diff --git a/models/gemma-3-27b-it.yaml b/models/gemma-3-27b-it.yaml new file mode 100644 index 000000000..f50716b49 --- /dev/null +++ b/models/gemma-3-27b-it.yaml @@ -0,0 +1,26 @@ +--- +model_id: gemma-3-27b-it +domain: open_router +description: Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it) +categories: + - openrouter + - cloud +urls: + - https://openrouter.ai/google/gemma-3-27b-it +config_entry_data: + api_key: !secret openrouter_api_key +subentries_data: + - subentry_type: conversation + title: Mock Title + data: + model: google/gemma-3-27b-it + llm_hass_api: assist + - subentry_type: ai_task_data + title: Mock Title + data: + model: google/gemma-3-27b-it + llm_hass_api: assist +rpm: 250 +cost: + input_tokens: 0.04 + output_tokens: 0.15 diff --git a/reports/README.md b/reports/README.md index 9b45a9a92..ab33844c7 100644 --- a/reports/README.md +++ b/reports/README.md @@ -30,6 +30,7 @@ | qwen3-4b-instruct-2507-iq4-nl | $${71.3\\% \space\color{gray}\tiny{\textsf{(CI: 4.1, 2025.12.4)}}}$$ | | | | $${71.3\\% \space\color{gray}\tiny{\textsf{(CI: 4.1, avg)}}}$$ | | gemini-2.0-flash-lite | $${65.9\\% \space\color{gray}\tiny{\textsf{(CI: 4.3, 2025.4.3)}}}$$ | $${88.3\\% \space\color{gray}\tiny{\textsf{(CI: 4.5, 2025.4.3)}}}$$ | $${63.2\\% \space\color{gray}\tiny{\textsf{(CI: 4.9, 2025.5.0.dev0)}}}$$ | $${53.3\\% \space\color{gray}\tiny{\textsf{(CI: 12.6, 2025.4.3)}}}$$ | $${69.2\\% \space\color{gray}\tiny{\textsf{(CI: 2.8, avg)}}}$$ | | qwen3-1.7b | $${35.9\\% \space\color{gray}\tiny{\textsf{(CI: 4.4, 2025.7.1)}}}$$ | $${60.2\\% \space\color{gray}\tiny{\textsf{(CI: 6.9, 2025.7.1)}}}$$ | $${59.5\\% \space\color{gray}\tiny{\textsf{(CI: 5.0, 2025.7.1)}}}$$ | $${0.0\\% \space\color{gray}\tiny{\textsf{(CI: 0.0, 2025.7.1)}}}$$ | $${49.0\\% \space\color{gray}\tiny{\textsf{(CI: 3.1, avg)}}}$$ | +| gemma-3-27b-it | $${8.7\\% \space\color{gray}\tiny{\textsf{(CI: 2.6, 2026.2.2)}}}$$ | $${0.0\\% \space\color{gray}\tiny{\textsf{(CI: 0.0, 2026.2.2)}}}$$ | $${2.7\\% \space\color{gray}\tiny{\textsf{(CI: 1.7, 2026.2.2)}}}$$ | $${0.0\\% \space\color{gray}\tiny{\textsf{(CI: 0.0, 2026.2.2)}}}$$ | $${4.9\\% \space\color{gray}\tiny{\textsf{(CI: 1.3, avg)}}}$$ | Implementation notes: - CI is large given small number of samples in the datasets. @@ -396,6 +397,16 @@ More information: - https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/ +### gemma-3-27b-it + +Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it) + + + +More information: +- https://openrouter.ai/google/gemma-3-27b-it + + ### glm-4.7-flash As the strongest model in the 30B class, GLM-4.7-Flash offers a new option for lightweight deployment that balances performance and efficiency. diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/_scrape_context.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/_scrape_context.yaml new file mode 100644 index 000000000..5217a8599 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/_scrape_context.yaml @@ -0,0 +1,19 @@ +--- +uuid: c7db25c0-6809-4836-9f20-7159a7586d54 +timestamp: 2026-02-16 08:07:44.693575 +scrape_config: + dataset: assist-mini + dataset_path: datasets/assist-mini + dataset_version: v1 + model_id: gemma-3-27b-it + model_output_path: reports/assist-mini/2026.2.2 +version: 2026.2.2 +context: + user: runner + argv: + - /home/runner/work/openrouter-benchmarks/openrouter-benchmarks/.venv/bin/pytest + - home_assistant_datasets/tool/assist/collect + - --models=gemma-3-27b-it + - --dataset=datasets/assist-mini/ + - --model_output_dir=reports/assist-mini/2026.2.2 +notes: '' diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-dining_room_light_off-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-dining_room_light_off-0.yaml new file mode 100644 index 000000000..79c593af7 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-dining_room_light_off-0.yaml @@ -0,0 +1,141 @@ +--- +uuid: bd22dce8-af05-417a-a549-3bbc30b9b9b7 +task_id: dom1_pl_lights_lights-dining_room_light_off-0 +model_id: gemma-3-27b-it +category: light +task: + input_text: Dining room light off + expect_changes: + light.dining_room_light: + state: 'off' + attributes: + brightness: null + color_mode: null +response: Error talking to API +context: + unexpected_states: + light.dining_room_light: + expected: + brightness: null + state: 'off' + color_mode: null + got: + brightness: 100 + state: 'on' + color_mode: brightness + conversation_trace: + - event_type: async_process + data: + text: Dining room light off + context: + id: 01KHJQVN6YMJSSGHMWSCTKXE9H + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:51.262251+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + created: 2026-02-16 08:07:51.263122+00:00 + - role: user + content: Dining room light off + attachments: null + created: 2026-02-16 08:07:51.262322+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:51.263131+00:00 + duration_ms: 91.297 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-dining_room_light_off-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-dining_room_light_off-1.yaml new file mode 100644 index 000000000..834a40696 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-dining_room_light_off-1.yaml @@ -0,0 +1,141 @@ +--- +uuid: 7ffd7d3f-0143-4243-b4c1-210143af7686 +task_id: dom1_pl_lights_lights-dining_room_light_off-1 +model_id: gemma-3-27b-it +category: light +task: + input_text: Dining room light off + expect_changes: + light.dining_room_light: + state: 'off' + attributes: + brightness: null + color_mode: null +response: Error talking to API +context: + unexpected_states: + light.dining_room_light: + expected: + brightness: null + state: 'off' + color_mode: null + got: + brightness: 100 + state: 'on' + color_mode: brightness + conversation_trace: + - event_type: async_process + data: + text: Dining room light off + context: + id: 01KHJQVNEVDB42XS98N4EDSFT1 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:51.516027+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + created: 2026-02-16 08:07:51.516775+00:00 + - role: user + content: Dining room light off + attachments: null + created: 2026-02-16 08:07:51.516121+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:51.516784+00:00 + duration_ms: 81.782 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-dining_room_light_off-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-dining_room_light_off-2.yaml new file mode 100644 index 000000000..cb7932f0c --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-dining_room_light_off-2.yaml @@ -0,0 +1,141 @@ +--- +uuid: 2ce4cb49-4350-4495-8245-9d7b79a0bb61 +task_id: dom1_pl_lights_lights-dining_room_light_off-2 +model_id: gemma-3-27b-it +category: light +task: + input_text: Dining room light off + expect_changes: + light.dining_room_light: + state: 'off' + attributes: + brightness: null + color_mode: null +response: Error talking to API +context: + unexpected_states: + light.dining_room_light: + expected: + brightness: null + state: 'off' + color_mode: null + got: + brightness: 100 + state: 'on' + color_mode: brightness + conversation_trace: + - event_type: async_process + data: + text: Dining room light off + context: + id: 01KHJQVNPNXKQ6BRVJS8VW0KSD + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:51.765196+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + created: 2026-02-16 08:07:51.765941+00:00 + - role: user + content: Dining room light off + attachments: null + created: 2026-02-16 08:07:51.765267+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:51.765950+00:00 + duration_ms: 86.943 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-dining_room_light_off-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-dining_room_light_off-3.yaml new file mode 100644 index 000000000..4a9ca82e4 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-dining_room_light_off-3.yaml @@ -0,0 +1,141 @@ +--- +uuid: 8c164b12-6fe3-4b0f-842c-4a7fd79b7a15 +task_id: dom1_pl_lights_lights-dining_room_light_off-3 +model_id: gemma-3-27b-it +category: light +task: + input_text: Dining room light off + expect_changes: + light.dining_room_light: + state: 'off' + attributes: + brightness: null + color_mode: null +response: Error talking to API +context: + unexpected_states: + light.dining_room_light: + expected: + brightness: null + state: 'off' + color_mode: null + got: + brightness: 100 + state: 'on' + color_mode: brightness + conversation_trace: + - event_type: async_process + data: + text: Dining room light off + context: + id: 01KHJQVNY8QYJSGYSCF1ME4XDA + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:52.008663+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + created: 2026-02-16 08:07:52.009405+00:00 + - role: user + content: Dining room light off + attachments: null + created: 2026-02-16 08:07:52.008732+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:52.009414+00:00 + duration_ms: 85.02 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-kitchen_light_off-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-kitchen_light_off-0.yaml new file mode 100644 index 000000000..70444a321 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-kitchen_light_off-0.yaml @@ -0,0 +1,141 @@ +--- +uuid: b59ef8db-8c63-442b-8c01-b069608b194a +task_id: dom1_pl_lights_lights-kitchen_light_off-0 +model_id: gemma-3-27b-it +category: light +task: + input_text: Kitchen light off + expect_changes: + light.kitchen_light: + state: 'off' + attributes: + brightness: null + color_mode: null +response: Error talking to API +context: + unexpected_states: + light.kitchen_light: + expected: + brightness: null + state: 'off' + color_mode: null + got: + brightness: 100 + state: 'on' + color_mode: brightness + conversation_trace: + - event_type: async_process + data: + text: Kitchen light off + context: + id: 01KHJQVKTVMHNJV7BH92KAK7GC + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:49.851327+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + created: 2026-02-16 08:07:49.852050+00:00 + - role: user + content: Kitchen light off + attachments: null + created: 2026-02-16 08:07:49.851397+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:49.852059+00:00 + duration_ms: 85.598 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-kitchen_light_off-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-kitchen_light_off-1.yaml new file mode 100644 index 000000000..85497c0ed --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-kitchen_light_off-1.yaml @@ -0,0 +1,141 @@ +--- +uuid: e567e726-8c92-411d-af28-d35872357166 +task_id: dom1_pl_lights_lights-kitchen_light_off-1 +model_id: gemma-3-27b-it +category: light +task: + input_text: Kitchen light off + expect_changes: + light.kitchen_light: + state: 'off' + attributes: + brightness: null + color_mode: null +response: Error talking to API +context: + unexpected_states: + light.kitchen_light: + expected: + brightness: null + state: 'off' + color_mode: null + got: + brightness: 100 + state: 'on' + color_mode: brightness + conversation_trace: + - event_type: async_process + data: + text: Kitchen light off + context: + id: 01KHJQVM2KXEX9P25GP3KRNE1V + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:50.099457+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + created: 2026-02-16 08:07:50.100222+00:00 + - role: user + content: Kitchen light off + attachments: null + created: 2026-02-16 08:07:50.099528+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:50.100231+00:00 + duration_ms: 87.928 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-kitchen_light_off-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-kitchen_light_off-2.yaml new file mode 100644 index 000000000..3c8dace80 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-kitchen_light_off-2.yaml @@ -0,0 +1,141 @@ +--- +uuid: 485b08af-b4e9-4492-a8a3-f0f08290d374 +task_id: dom1_pl_lights_lights-kitchen_light_off-2 +model_id: gemma-3-27b-it +category: light +task: + input_text: Kitchen light off + expect_changes: + light.kitchen_light: + state: 'off' + attributes: + brightness: null + color_mode: null +response: Error talking to API +context: + unexpected_states: + light.kitchen_light: + expected: + brightness: null + state: 'off' + color_mode: null + got: + brightness: 100 + state: 'on' + color_mode: brightness + conversation_trace: + - event_type: async_process + data: + text: Kitchen light off + context: + id: 01KHJQVMC23RGYA9BTX4HNYHX6 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:50.402705+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + created: 2026-02-16 08:07:50.403476+00:00 + - role: user + content: Kitchen light off + attachments: null + created: 2026-02-16 08:07:50.402774+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:50.403486+00:00 + duration_ms: 83.591 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-kitchen_light_off-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-kitchen_light_off-3.yaml new file mode 100644 index 000000000..56d102137 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-kitchen_light_off-3.yaml @@ -0,0 +1,141 @@ +--- +uuid: 916294fc-26a0-4e57-b0fb-6957e8b5b5cd +task_id: dom1_pl_lights_lights-kitchen_light_off-3 +model_id: gemma-3-27b-it +category: light +task: + input_text: Kitchen light off + expect_changes: + light.kitchen_light: + state: 'off' + attributes: + brightness: null + color_mode: null +response: Error talking to API +context: + unexpected_states: + light.kitchen_light: + expected: + brightness: null + state: 'off' + color_mode: null + got: + brightness: 100 + state: 'on' + color_mode: brightness + conversation_trace: + - event_type: async_process + data: + text: Kitchen light off + context: + id: 01KHJQVMMFAZ41FWYBE2N7WMWV + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:50.671942+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + created: 2026-02-16 08:07:50.672718+00:00 + - role: user + content: Kitchen light off + attachments: null + created: 2026-02-16 08:07:50.672014+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:50.672727+00:00 + duration_ms: 115.325 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-please_turn_on_the_kitchen_light-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-please_turn_on_the_kitchen_light-0.yaml new file mode 100644 index 000000000..05333e26a --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-please_turn_on_the_kitchen_light-0.yaml @@ -0,0 +1,141 @@ +--- +uuid: 02031e0f-da65-4340-81fa-49ae6bfe2452 +task_id: dom1_pl_lights_lights-please_turn_on_the_kitchen_light-0 +model_id: gemma-3-27b-it +category: light +task: + input_text: Please turn on the kitchen light + expect_changes: + light.kitchen_light: + state: 'on' + attributes: + brightness: 0 + color_mode: brightness +response: Error talking to API +context: + unexpected_states: + light.kitchen_light: + expected: + brightness: 0 + state: 'on' + color_mode: brightness + got: + brightness: null + state: 'off' + color_mode: null + conversation_trace: + - event_type: async_process + data: + text: Please turn on the kitchen light + context: + id: 01KHJQVGGZBZ9FHMGENEE48HG1 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:46.463485+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + created: 2026-02-16 08:07:46.464349+00:00 + - role: user + content: Please turn on the kitchen light + attachments: null + created: 2026-02-16 08:07:46.463574+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:46.464364+00:00 + duration_ms: 94.701 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-please_turn_on_the_kitchen_light-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-please_turn_on_the_kitchen_light-1.yaml new file mode 100644 index 000000000..b1dff116f --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-please_turn_on_the_kitchen_light-1.yaml @@ -0,0 +1,141 @@ +--- +uuid: 006c003c-9ff9-4a64-9d7a-e9f8a2e63212 +task_id: dom1_pl_lights_lights-please_turn_on_the_kitchen_light-1 +model_id: gemma-3-27b-it +category: light +task: + input_text: Please turn on the kitchen light + expect_changes: + light.kitchen_light: + state: 'on' + attributes: + brightness: 0 + color_mode: brightness +response: Error talking to API +context: + unexpected_states: + light.kitchen_light: + expected: + brightness: 0 + state: 'on' + color_mode: brightness + got: + brightness: null + state: 'off' + color_mode: null + conversation_trace: + - event_type: async_process + data: + text: Please turn on the kitchen light + context: + id: 01KHJQVGSWHMRKTAFVPT8NF3MT + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:46.748400+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + created: 2026-02-16 08:07:46.749279+00:00 + - role: user + content: Please turn on the kitchen light + attachments: null + created: 2026-02-16 08:07:46.748494+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:46.749293+00:00 + duration_ms: 91.272 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-please_turn_on_the_kitchen_light-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-please_turn_on_the_kitchen_light-2.yaml new file mode 100644 index 000000000..db1e2e8e9 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-please_turn_on_the_kitchen_light-2.yaml @@ -0,0 +1,141 @@ +--- +uuid: 1a9b39e1-b631-45ef-9a83-6ea76e0f79d9 +task_id: dom1_pl_lights_lights-please_turn_on_the_kitchen_light-2 +model_id: gemma-3-27b-it +category: light +task: + input_text: Please turn on the kitchen light + expect_changes: + light.kitchen_light: + state: 'on' + attributes: + brightness: 0 + color_mode: brightness +response: Error talking to API +context: + unexpected_states: + light.kitchen_light: + expected: + brightness: 0 + state: 'on' + color_mode: brightness + got: + brightness: null + state: 'off' + color_mode: null + conversation_trace: + - event_type: async_process + data: + text: Please turn on the kitchen light + context: + id: 01KHJQVH26GMGKJ994RGM44Z3S + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:47.014510+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + created: 2026-02-16 08:07:47.015284+00:00 + - role: user + content: Please turn on the kitchen light + attachments: null + created: 2026-02-16 08:07:47.014586+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:47.015293+00:00 + duration_ms: 88.92 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-please_turn_on_the_kitchen_light-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-please_turn_on_the_kitchen_light-3.yaml new file mode 100644 index 000000000..ab52b0e1e --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-please_turn_on_the_kitchen_light-3.yaml @@ -0,0 +1,141 @@ +--- +uuid: ad884eb0-e179-4182-8908-9d3ab5b33dd0 +task_id: dom1_pl_lights_lights-please_turn_on_the_kitchen_light-3 +model_id: gemma-3-27b-it +category: light +task: + input_text: Please turn on the kitchen light + expect_changes: + light.kitchen_light: + state: 'on' + attributes: + brightness: 0 + color_mode: brightness +response: Error talking to API +context: + unexpected_states: + light.kitchen_light: + expected: + brightness: 0 + state: 'on' + color_mode: brightness + got: + brightness: null + state: 'off' + color_mode: null + conversation_trace: + - event_type: async_process + data: + text: Please turn on the kitchen light + context: + id: 01KHJQVHAKMTB5GKCHCFASPJXG + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:47.283788+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + created: 2026-02-16 08:07:47.284586+00:00 + - role: user + content: Please turn on the kitchen light + attachments: null + created: 2026-02-16 08:07:47.283857+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:47.284595+00:00 + duration_ms: 91.927 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-turn_on_the_backyard_light-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-turn_on_the_backyard_light-0.yaml new file mode 100644 index 000000000..311042b46 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-turn_on_the_backyard_light-0.yaml @@ -0,0 +1,141 @@ +--- +uuid: 8b0ad14c-014c-4733-992a-cd3db8fd7498 +task_id: dom1_pl_lights_lights-turn_on_the_backyard_light-0 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on the Backyard light + expect_changes: + light.garden_light: + state: 'on' + attributes: + brightness: 0 + color_mode: brightness +response: Error talking to API +context: + unexpected_states: + light.garden_light: + expected: + brightness: 0 + state: 'on' + color_mode: brightness + got: + brightness: null + state: 'off' + color_mode: null + conversation_trace: + - event_type: async_process + data: + text: Turn on the Backyard light + context: + id: 01KHJQVJV01HN7B74XXJGH6JF0 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:48.832307+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + created: 2026-02-16 08:07:48.833022+00:00 + - role: user + content: Turn on the Backyard light + attachments: null + created: 2026-02-16 08:07:48.832376+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:48.833031+00:00 + duration_ms: 91.324 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-turn_on_the_backyard_light-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-turn_on_the_backyard_light-1.yaml new file mode 100644 index 000000000..1e93bffee --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-turn_on_the_backyard_light-1.yaml @@ -0,0 +1,141 @@ +--- +uuid: ddd82fc6-dffe-4825-b76b-dee3d85a0558 +task_id: dom1_pl_lights_lights-turn_on_the_backyard_light-1 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on the Backyard light + expect_changes: + light.garden_light: + state: 'on' + attributes: + brightness: 0 + color_mode: brightness +response: Error talking to API +context: + unexpected_states: + light.garden_light: + expected: + brightness: 0 + state: 'on' + color_mode: brightness + got: + brightness: null + state: 'off' + color_mode: null + conversation_trace: + - event_type: async_process + data: + text: Turn on the Backyard light + context: + id: 01KHJQVK2VPTKEDAM2JYTD2GAX + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:49.083803+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + created: 2026-02-16 08:07:49.084617+00:00 + - role: user + content: Turn on the Backyard light + attachments: null + created: 2026-02-16 08:07:49.083879+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:49.084628+00:00 + duration_ms: 87.084 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-turn_on_the_backyard_light-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-turn_on_the_backyard_light-2.yaml new file mode 100644 index 000000000..e1af62a57 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-turn_on_the_backyard_light-2.yaml @@ -0,0 +1,141 @@ +--- +uuid: 1e161c6c-4572-4554-aadb-c765b3d9cca8 +task_id: dom1_pl_lights_lights-turn_on_the_backyard_light-2 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on the Backyard light + expect_changes: + light.garden_light: + state: 'on' + attributes: + brightness: 0 + color_mode: brightness +response: Error talking to API +context: + unexpected_states: + light.garden_light: + expected: + brightness: 0 + state: 'on' + color_mode: brightness + got: + brightness: null + state: 'off' + color_mode: null + conversation_trace: + - event_type: async_process + data: + text: Turn on the Backyard light + context: + id: 01KHJQVKAQWFBMNRDYMDYWXAPG + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:49.335605+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + created: 2026-02-16 08:07:49.336396+00:00 + - role: user + content: Turn on the Backyard light + attachments: null + created: 2026-02-16 08:07:49.335674+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:49.336406+00:00 + duration_ms: 86.608 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-turn_on_the_backyard_light-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-turn_on_the_backyard_light-3.yaml new file mode 100644 index 000000000..30de63fd3 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-turn_on_the_backyard_light-3.yaml @@ -0,0 +1,141 @@ +--- +uuid: d6b13567-d9e2-44c5-9a10-ab58e3c35fa6 +task_id: dom1_pl_lights_lights-turn_on_the_backyard_light-3 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on the Backyard light + expect_changes: + light.garden_light: + state: 'on' + attributes: + brightness: 0 + color_mode: brightness +response: Error talking to API +context: + unexpected_states: + light.garden_light: + expected: + brightness: 0 + state: 'on' + color_mode: brightness + got: + brightness: null + state: 'off' + color_mode: null + conversation_trace: + - event_type: async_process + data: + text: Turn on the Backyard light + context: + id: 01KHJQVKJGMBDR4DQJVNV0BTVG + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:49.584674+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + created: 2026-02-16 08:07:49.585478+00:00 + - role: user + content: Turn on the Backyard light + attachments: null + created: 2026-02-16 08:07:49.584744+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:49.585487+00:00 + duration_ms: 90.29 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-turn_on_the_living_room_light-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-turn_on_the_living_room_light-0.yaml new file mode 100644 index 000000000..a3921979a --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-turn_on_the_living_room_light-0.yaml @@ -0,0 +1,141 @@ +--- +uuid: da6376d4-070d-443f-9333-c7151cc7fdd8 +task_id: dom1_pl_lights_lights-turn_on_the_living_room_light-0 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on the living room light + expect_changes: + light.living_room_light: + state: 'on' + attributes: + brightness: 0 + color_mode: brightness +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + brightness: 0 + state: 'on' + color_mode: brightness + got: + brightness: null + state: 'off' + color_mode: null + conversation_trace: + - event_type: async_process + data: + text: Turn on the living room light + context: + id: 01KHJQVHKTMCQFG89EM806BV5H + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:47.578924+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + created: 2026-02-16 08:07:47.579707+00:00 + - role: user + content: Turn on the living room light + attachments: null + created: 2026-02-16 08:07:47.578996+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:47.579716+00:00 + duration_ms: 314.078 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-turn_on_the_living_room_light-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-turn_on_the_living_room_light-1.yaml new file mode 100644 index 000000000..c0f3453e8 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-turn_on_the_living_room_light-1.yaml @@ -0,0 +1,141 @@ +--- +uuid: b2e27930-023f-44b3-8724-df4e2482f79b +task_id: dom1_pl_lights_lights-turn_on_the_living_room_light-1 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on the living room light + expect_changes: + light.living_room_light: + state: 'on' + attributes: + brightness: 0 + color_mode: brightness +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + brightness: 0 + state: 'on' + color_mode: brightness + got: + brightness: null + state: 'off' + color_mode: null + conversation_trace: + - event_type: async_process + data: + text: Turn on the living room light + context: + id: 01KHJQVJ33VTX3J7V4HET4DJ79 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:48.067627+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + created: 2026-02-16 08:07:48.068380+00:00 + - role: user + content: Turn on the living room light + attachments: null + created: 2026-02-16 08:07:48.067695+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:48.068390+00:00 + duration_ms: 99.796 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-turn_on_the_living_room_light-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-turn_on_the_living_room_light-2.yaml new file mode 100644 index 000000000..44653a387 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-turn_on_the_living_room_light-2.yaml @@ -0,0 +1,141 @@ +--- +uuid: e6f2151c-acc2-4f58-9b6f-68d67bf2905e +task_id: dom1_pl_lights_lights-turn_on_the_living_room_light-2 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on the living room light + expect_changes: + light.living_room_light: + state: 'on' + attributes: + brightness: 0 + color_mode: brightness +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + brightness: 0 + state: 'on' + color_mode: brightness + got: + brightness: null + state: 'off' + color_mode: null + conversation_trace: + - event_type: async_process + data: + text: Turn on the living room light + context: + id: 01KHJQVJAVXFMVGZM48Q494638 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:48.315617+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + created: 2026-02-16 08:07:48.316371+00:00 + - role: user + content: Turn on the living room light + attachments: null + created: 2026-02-16 08:07:48.315686+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:48.316381+00:00 + duration_ms: 81.842 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-turn_on_the_living_room_light-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-turn_on_the_living_room_light-3.yaml new file mode 100644 index 000000000..31834cc73 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_lights_lights-turn_on_the_living_room_light-3.yaml @@ -0,0 +1,141 @@ +--- +uuid: 78c4ba59-94be-41c5-9489-cbebdd44c91c +task_id: dom1_pl_lights_lights-turn_on_the_living_room_light-3 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on the living room light + expect_changes: + light.living_room_light: + state: 'on' + attributes: + brightness: 0 + color_mode: brightness +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + brightness: 0 + state: 'on' + color_mode: brightness + got: + brightness: null + state: 'off' + color_mode: null + conversation_trace: + - event_type: async_process + data: + text: Turn on the living room light + context: + id: 01KHJQVJJKY9KP5445WZFW7M4X + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:48.563627+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + created: 2026-02-16 08:07:48.564392+00:00 + - role: user + content: Turn on the living room light + attachments: null + created: 2026-02-16 08:07:48.563696+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:48.564401+00:00 + duration_ms: 90.455 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_clean_the_kitchen_to_my_todo_list-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_clean_the_kitchen_to_my_todo_list-0.yaml new file mode 100644 index 000000000..2f148ac1e --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_clean_the_kitchen_to_my_todo_list-0.yaml @@ -0,0 +1,118 @@ +--- +uuid: 4abd184e-4658-4936-97b0-cdaa33d6ffb2 +task_id: dom1_pl_todo_todo-add_clean_the_kitchen_to_my_todo_list-0 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Add clean the kitchen to my todo list + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Add clean the kitchen to my todo list + context: + id: 01KHJQVRNK9BTKJT2519N5DGTK + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:54.803636+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Personal Tasks + domain: todo + created: 2026-02-16 08:07:54.804289+00:00 + - role: user + content: Add clean the kitchen to my todo list + attachments: null + created: 2026-02-16 08:07:54.803709+00:00 + tools: + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:54.804298+00:00 + duration_ms: 96.705 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_clean_the_kitchen_to_my_todo_list-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_clean_the_kitchen_to_my_todo_list-1.yaml new file mode 100644 index 000000000..f21c41d4a --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_clean_the_kitchen_to_my_todo_list-1.yaml @@ -0,0 +1,118 @@ +--- +uuid: 61d6a4ea-c98f-4b72-a563-9a3c67f479fc +task_id: dom1_pl_todo_todo-add_clean_the_kitchen_to_my_todo_list-1 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Add clean the kitchen to my todo list + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Add clean the kitchen to my todo list + context: + id: 01KHJQVRXJYVTRBTRPN6PWQSJ3 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:55.058887+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Personal Tasks + domain: todo + created: 2026-02-16 08:07:55.059556+00:00 + - role: user + content: Add clean the kitchen to my todo list + attachments: null + created: 2026-02-16 08:07:55.058959+00:00 + tools: + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:55.059566+00:00 + duration_ms: 103.794 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_clean_the_kitchen_to_my_todo_list-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_clean_the_kitchen_to_my_todo_list-2.yaml new file mode 100644 index 000000000..4f631bebd --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_clean_the_kitchen_to_my_todo_list-2.yaml @@ -0,0 +1,118 @@ +--- +uuid: 8989bdc4-c314-45da-9824-6afca880ee48 +task_id: dom1_pl_todo_todo-add_clean_the_kitchen_to_my_todo_list-2 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Add clean the kitchen to my todo list + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Add clean the kitchen to my todo list + context: + id: 01KHJQVS50BXS34H0257DDSMHJ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:55.296912+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Personal Tasks + domain: todo + created: 2026-02-16 08:07:55.297532+00:00 + - role: user + content: Add clean the kitchen to my todo list + attachments: null + created: 2026-02-16 08:07:55.296987+00:00 + tools: + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:55.297542+00:00 + duration_ms: 23.544 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_clean_the_kitchen_to_my_todo_list-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_clean_the_kitchen_to_my_todo_list-3.yaml new file mode 100644 index 000000000..b742658bc --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_clean_the_kitchen_to_my_todo_list-3.yaml @@ -0,0 +1,118 @@ +--- +uuid: e3d17307-5084-472c-95e8-bbe005002118 +task_id: dom1_pl_todo_todo-add_clean_the_kitchen_to_my_todo_list-3 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Add clean the kitchen to my todo list + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Add clean the kitchen to my todo list + context: + id: 01KHJQVSA762N57F6BEAA2PY0G + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:55.463690+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Personal Tasks + domain: todo + created: 2026-02-16 08:07:55.464348+00:00 + - role: user + content: Add clean the kitchen to my todo list + attachments: null + created: 2026-02-16 08:07:55.463763+00:00 + tools: + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:55.464358+00:00 + duration_ms: 23.312 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_history_homework_to_my_personal_tasks-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_history_homework_to_my_personal_tasks-0.yaml new file mode 100644 index 000000000..5880c3c26 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_history_homework_to_my_personal_tasks-0.yaml @@ -0,0 +1,118 @@ +--- +uuid: 0ff04676-9bb2-40a4-9522-f12df4f3b375 +task_id: dom1_pl_todo_todo-add_history_homework_to_my_personal_tasks-0 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Add history homework to my personal tasks + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Add history homework to my personal tasks + context: + id: 01KHJQVQ282FRS525EYTBE3P0E + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:53.160348+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Personal Tasks + domain: todo + created: 2026-02-16 08:07:53.160972+00:00 + - role: user + content: Add history homework to my personal tasks + attachments: null + created: 2026-02-16 08:07:53.160428+00:00 + tools: + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:53.160980+00:00 + duration_ms: 21.265 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_history_homework_to_my_personal_tasks-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_history_homework_to_my_personal_tasks-1.yaml new file mode 100644 index 000000000..797cac321 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_history_homework_to_my_personal_tasks-1.yaml @@ -0,0 +1,118 @@ +--- +uuid: 5aeaf8e3-9bb5-4b7c-b311-998077d9bded +task_id: dom1_pl_todo_todo-add_history_homework_to_my_personal_tasks-1 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Add history homework to my personal tasks + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Add history homework to my personal tasks + context: + id: 01KHJQVQ7BGE6P35AWJKF2NGCZ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:53.323317+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Personal Tasks + domain: todo + created: 2026-02-16 08:07:53.323923+00:00 + - role: user + content: Add history homework to my personal tasks + attachments: null + created: 2026-02-16 08:07:53.323388+00:00 + tools: + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:53.323932+00:00 + duration_ms: 93.679 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_history_homework_to_my_personal_tasks-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_history_homework_to_my_personal_tasks-2.yaml new file mode 100644 index 000000000..651c368e2 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_history_homework_to_my_personal_tasks-2.yaml @@ -0,0 +1,118 @@ +--- +uuid: 9cae7042-1e50-421d-bb6d-306326b6b3f0 +task_id: dom1_pl_todo_todo-add_history_homework_to_my_personal_tasks-2 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Add history homework to my personal tasks + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Add history homework to my personal tasks + context: + id: 01KHJQVQES2WZ08QVFT2B0P5K7 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:53.561242+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Personal Tasks + domain: todo + created: 2026-02-16 08:07:53.561865+00:00 + - role: user + content: Add history homework to my personal tasks + attachments: null + created: 2026-02-16 08:07:53.561317+00:00 + tools: + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:53.561874+00:00 + duration_ms: 23.444 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_history_homework_to_my_personal_tasks-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_history_homework_to_my_personal_tasks-3.yaml new file mode 100644 index 000000000..8cce8107e --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_history_homework_to_my_personal_tasks-3.yaml @@ -0,0 +1,118 @@ +--- +uuid: 3b83825a-c0ef-4dee-affa-e778d8959b11 +task_id: dom1_pl_todo_todo-add_history_homework_to_my_personal_tasks-3 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Add history homework to my personal tasks + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Add history homework to my personal tasks + context: + id: 01KHJQVQMG3WTPW9QV07BHHTX2 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:53.744996+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Personal Tasks + domain: todo + created: 2026-02-16 08:07:53.745646+00:00 + - role: user + content: Add history homework to my personal tasks + attachments: null + created: 2026-02-16 08:07:53.745070+00:00 + tools: + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:53.745656+00:00 + duration_ms: 88.285 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_history_homework_to_my_tasks-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_history_homework_to_my_tasks-0.yaml new file mode 100644 index 000000000..50271344a --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_history_homework_to_my_tasks-0.yaml @@ -0,0 +1,118 @@ +--- +uuid: 4fe4c871-988e-4268-9fb8-5e9a82cc0ad7 +task_id: dom1_pl_todo_todo-add_history_homework_to_my_tasks-0 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Add history homework to my tasks + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Add history homework to my tasks + context: + id: 01KHJQVQVX77P0AGY6D4VSFVP8 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:53.982005+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Personal Tasks + domain: todo + created: 2026-02-16 08:07:53.982636+00:00 + - role: user + content: Add history homework to my tasks + attachments: null + created: 2026-02-16 08:07:53.982106+00:00 + tools: + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:53.982646+00:00 + duration_ms: 27.846 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_history_homework_to_my_tasks-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_history_homework_to_my_tasks-1.yaml new file mode 100644 index 000000000..7b4833b42 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_history_homework_to_my_tasks-1.yaml @@ -0,0 +1,118 @@ +--- +uuid: 7b84fbcc-d595-4f2a-ac51-0ff84ec2b138 +task_id: dom1_pl_todo_todo-add_history_homework_to_my_tasks-1 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Add history homework to my tasks + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Add history homework to my tasks + context: + id: 01KHJQVR21ZVS6JGJPJF7N02E3 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:54.177868+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Personal Tasks + domain: todo + created: 2026-02-16 08:07:54.178525+00:00 + - role: user + content: Add history homework to my tasks + attachments: null + created: 2026-02-16 08:07:54.177942+00:00 + tools: + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:54.178534+00:00 + duration_ms: 28.147 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_history_homework_to_my_tasks-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_history_homework_to_my_tasks-2.yaml new file mode 100644 index 000000000..9e2f47f7e --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_history_homework_to_my_tasks-2.yaml @@ -0,0 +1,118 @@ +--- +uuid: eaa25039-46b5-4668-bf9d-a66687433c4d +task_id: dom1_pl_todo_todo-add_history_homework_to_my_tasks-2 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Add history homework to my tasks + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Add history homework to my tasks + context: + id: 01KHJQVR7QH5VMM067F0JSKPVM + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:54.359343+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Personal Tasks + domain: todo + created: 2026-02-16 08:07:54.359927+00:00 + - role: user + content: Add history homework to my tasks + attachments: null + created: 2026-02-16 08:07:54.359414+00:00 + tools: + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:54.359936+00:00 + duration_ms: 21.841 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_history_homework_to_my_tasks-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_history_homework_to_my_tasks-3.yaml new file mode 100644 index 000000000..5cf25bec3 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-add_history_homework_to_my_tasks-3.yaml @@ -0,0 +1,118 @@ +--- +uuid: 3b0bdb15-bc4f-4a4e-a4a0-707c42f3e819 +task_id: dom1_pl_todo_todo-add_history_homework_to_my_tasks-3 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Add history homework to my tasks + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Add history homework to my tasks + context: + id: 01KHJQVRDT004042PKTP4W9ZF0 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:54.554188+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Personal Tasks + domain: todo + created: 2026-02-16 08:07:54.554803+00:00 + - role: user + content: Add history homework to my tasks + attachments: null + created: 2026-02-16 08:07:54.554262+00:00 + tools: + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:54.554812+00:00 + duration_ms: 80.671 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-put_history_homework_on_personal_tasks_todo_list-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-put_history_homework_on_personal_tasks_todo_list-0.yaml new file mode 100644 index 000000000..04e3b7f8d --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-put_history_homework_on_personal_tasks_todo_list-0.yaml @@ -0,0 +1,118 @@ +--- +uuid: a33b7c69-c880-4264-ae13-0b17f4dd00b4 +task_id: dom1_pl_todo_todo-put_history_homework_on_personal_tasks_todo_list-0 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Put history homework on personal tasks todo list + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Put history homework on personal tasks todo list + context: + id: 01KHJQVP5K3DZ0XXMTTNRTYR3V + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:52.243169+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Personal Tasks + domain: todo + created: 2026-02-16 08:07:52.243756+00:00 + - role: user + content: Put history homework on personal tasks todo list + attachments: null + created: 2026-02-16 08:07:52.243241+00:00 + tools: + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:52.243765+00:00 + duration_ms: 21.793 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-put_history_homework_on_personal_tasks_todo_list-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-put_history_homework_on_personal_tasks_todo_list-1.yaml new file mode 100644 index 000000000..c9ccf4571 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-put_history_homework_on_personal_tasks_todo_list-1.yaml @@ -0,0 +1,118 @@ +--- +uuid: 5b6f7908-0b7e-4369-a8d3-1cd4247ec3dd +task_id: dom1_pl_todo_todo-put_history_homework_on_personal_tasks_todo_list-1 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Put history homework on personal tasks todo list + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Put history homework on personal tasks todo list + context: + id: 01KHJQVPAG2NC276AN8EDWCJMM + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:52.400890+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Personal Tasks + domain: todo + created: 2026-02-16 08:07:52.401526+00:00 + - role: user + content: Put history homework on personal tasks todo list + attachments: null + created: 2026-02-16 08:07:52.400960+00:00 + tools: + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:52.401536+00:00 + duration_ms: 92.982 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-put_history_homework_on_personal_tasks_todo_list-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-put_history_homework_on_personal_tasks_todo_list-2.yaml new file mode 100644 index 000000000..77ae611ca --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-put_history_homework_on_personal_tasks_todo_list-2.yaml @@ -0,0 +1,118 @@ +--- +uuid: 9d8c8a86-61d4-4a4d-9ced-de02c02ed69d +task_id: dom1_pl_todo_todo-put_history_homework_on_personal_tasks_todo_list-2 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Put history homework on personal tasks todo list + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Put history homework on personal tasks todo list + context: + id: 01KHJQVPK4NZ0DTZEGTZJ5A1NH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:52.676420+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Personal Tasks + domain: todo + created: 2026-02-16 08:07:52.677005+00:00 + - role: user + content: Put history homework on personal tasks todo list + attachments: null + created: 2026-02-16 08:07:52.676489+00:00 + tools: + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:52.677014+00:00 + duration_ms: 99.819 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-put_history_homework_on_personal_tasks_todo_list-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-put_history_homework_on_personal_tasks_todo_list-3.yaml new file mode 100644 index 000000000..4c78f4b69 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/dom1_pl_todo_todo-put_history_homework_on_personal_tasks_todo_list-3.yaml @@ -0,0 +1,118 @@ +--- +uuid: 287d7483-1f85-4d74-8fc1-a7232bfc65b8 +task_id: dom1_pl_todo_todo-put_history_homework_on_personal_tasks_todo_list-3 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Put history homework on personal tasks todo list + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Put history homework on personal tasks todo list + context: + id: 01KHJQVPVFD3WYX8J67JQB17G2 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:52.943608+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Personal Tasks + domain: todo + created: 2026-02-16 08:07:52.944264+00:00 + - role: user + content: Put history homework on personal tasks todo list + attachments: null + created: 2026-02-16 08:07:52.943679+00:00 + tools: + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:52.944273+00:00 + duration_ms: 85.376 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-close_the_garage_door-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-close_the_garage_door-0.yaml new file mode 100644 index 000000000..c0d516792 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-close_the_garage_door-0.yaml @@ -0,0 +1,134 @@ +--- +uuid: 5ca6f674-9f6e-4fef-93d9-d53055f205ee +task_id: home1_us_cover_garage_cover_garage-close_the_garage_door-0 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the garage door + expect_changes: + cover.garage_door_opener: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the garage door + context: + id: 01KHJQVSFRV9KFVTEE7W07DT5W + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:55.640272+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + created: 2026-02-16 08:07:55.642138+00:00 + - role: user + content: Close the garage door + attachments: null + created: 2026-02-16 08:07:55.640342+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:55.642149+00:00 + duration_ms: 25.959 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-close_the_garage_door-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-close_the_garage_door-1.yaml new file mode 100644 index 000000000..5acb1449f --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-close_the_garage_door-1.yaml @@ -0,0 +1,134 @@ +--- +uuid: e860ddb5-07b0-47ee-9e03-1ad2eaaa2527 +task_id: home1_us_cover_garage_cover_garage-close_the_garage_door-1 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the garage door + expect_changes: + cover.garage_door_opener: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the garage door + context: + id: 01KHJQVSNF6ZSX785TNMYNVZ7Q + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:55.823849+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + created: 2026-02-16 08:07:55.828693+00:00 + - role: user + content: Close the garage door + attachments: null + created: 2026-02-16 08:07:55.823920+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:55.828704+00:00 + duration_ms: 30.554 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-close_the_garage_door-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-close_the_garage_door-2.yaml new file mode 100644 index 000000000..074d0ad16 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-close_the_garage_door-2.yaml @@ -0,0 +1,134 @@ +--- +uuid: 9e16a25a-212e-46f3-8567-d32ebe0c4505 +task_id: home1_us_cover_garage_cover_garage-close_the_garage_door-2 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the garage door + expect_changes: + cover.garage_door_opener: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the garage door + context: + id: 01KHJQVSVG0XSZWVRGANKCZ6GW + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:56.016843+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + created: 2026-02-16 08:07:56.020064+00:00 + - role: user + content: Close the garage door + attachments: null + created: 2026-02-16 08:07:56.016915+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:56.020092+00:00 + duration_ms: 29.857 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-close_the_garage_door-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-close_the_garage_door-3.yaml new file mode 100644 index 000000000..4c7bed716 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-close_the_garage_door-3.yaml @@ -0,0 +1,134 @@ +--- +uuid: 4f417a57-1c6e-4bb7-a3c6-f4fb1df560a7 +task_id: home1_us_cover_garage_cover_garage-close_the_garage_door-3 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the garage door + expect_changes: + cover.garage_door_opener: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the garage door + context: + id: 01KHJQVT11DAGA6TP56AD5GQZD + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:56.193474+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + created: 2026-02-16 08:07:56.196771+00:00 + - role: user + content: Close the garage door + attachments: null + created: 2026-02-16 08:07:56.193542+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:56.196782+00:00 + duration_ms: 26.539 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-open_the_garage_door-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-open_the_garage_door-0.yaml new file mode 100644 index 000000000..52eb869b0 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-open_the_garage_door-0.yaml @@ -0,0 +1,134 @@ +--- +uuid: 4311093e-ad46-4299-860c-3dc509c84408 +task_id: home1_us_cover_garage_cover_garage-open_the_garage_door-0 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the garage door + expect_changes: + cover.garage_door_opener: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the garage door + context: + id: 01KHJQVVVHJ0C3735SM0A5NWF7 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:58.065171+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + created: 2026-02-16 08:07:58.065932+00:00 + - role: user + content: Open the garage door + attachments: null + created: 2026-02-16 08:07:58.065244+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:58.065941+00:00 + duration_ms: 23.184 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-open_the_garage_door-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-open_the_garage_door-1.yaml new file mode 100644 index 000000000..b96aa7a98 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-open_the_garage_door-1.yaml @@ -0,0 +1,134 @@ +--- +uuid: 7da79143-bea5-478b-bce7-f4f5d165c45d +task_id: home1_us_cover_garage_cover_garage-open_the_garage_door-1 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the garage door + expect_changes: + cover.garage_door_opener: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the garage door + context: + id: 01KHJQVW142PFQ7GW8HJGQR0W1 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:58.244906+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + created: 2026-02-16 08:07:58.246177+00:00 + - role: user + content: Open the garage door + attachments: null + created: 2026-02-16 08:07:58.244977+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:58.246187+00:00 + duration_ms: 25.955 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-open_the_garage_door-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-open_the_garage_door-2.yaml new file mode 100644 index 000000000..1078bdcea --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-open_the_garage_door-2.yaml @@ -0,0 +1,134 @@ +--- +uuid: e909cc4b-1673-4827-ad53-9c39707a12a0 +task_id: home1_us_cover_garage_cover_garage-open_the_garage_door-2 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the garage door + expect_changes: + cover.garage_door_opener: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the garage door + context: + id: 01KHJQVW6PB0FY1ARDPZ5CG2C4 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:58.422144+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + created: 2026-02-16 08:07:58.424394+00:00 + - role: user + content: Open the garage door + attachments: null + created: 2026-02-16 08:07:58.422221+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:58.424407+00:00 + duration_ms: 26.961 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-open_the_garage_door-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-open_the_garage_door-3.yaml new file mode 100644 index 000000000..3a3ca0594 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-open_the_garage_door-3.yaml @@ -0,0 +1,134 @@ +--- +uuid: b7207a6c-ed45-41f2-9643-8194cc554e36 +task_id: home1_us_cover_garage_cover_garage-open_the_garage_door-3 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the garage door + expect_changes: + cover.garage_door_opener: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the garage door + context: + id: 01KHJQVWEGJFT2RDZ5N8VYTAA7 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:58.672582+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + created: 2026-02-16 08:07:58.673394+00:00 + - role: user + content: Open the garage door + attachments: null + created: 2026-02-16 08:07:58.672652+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:58.673402+00:00 + duration_ms: 106.187 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-please_close_the_garage_door-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-please_close_the_garage_door-0.yaml new file mode 100644 index 000000000..41f3e0eee --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-please_close_the_garage_door-0.yaml @@ -0,0 +1,134 @@ +--- +uuid: 73f3c2a5-f597-44ff-9631-a0dc2370120f +task_id: home1_us_cover_garage_cover_garage-please_close_the_garage_door-0 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Please close the garage door + expect_changes: + cover.garage_door_opener: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Please close the garage door + context: + id: 01KHJQVT6F4V7NWFT2PSAKXKYF + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:56.367540+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + created: 2026-02-16 08:07:56.369439+00:00 + - role: user + content: Please close the garage door + attachments: null + created: 2026-02-16 08:07:56.367616+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:56.369449+00:00 + duration_ms: 24.479 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-please_close_the_garage_door-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-please_close_the_garage_door-1.yaml new file mode 100644 index 000000000..7ad42ae21 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-please_close_the_garage_door-1.yaml @@ -0,0 +1,134 @@ +--- +uuid: 2dd155be-61d1-4c61-85bd-58a1014c0e42 +task_id: home1_us_cover_garage_cover_garage-please_close_the_garage_door-1 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Please close the garage door + expect_changes: + cover.garage_door_opener: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Please close the garage door + context: + id: 01KHJQVTCF3XB5PBKTKGXY3H41 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:56.559398+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + created: 2026-02-16 08:07:56.562026+00:00 + - role: user + content: Please close the garage door + attachments: null + created: 2026-02-16 08:07:56.559468+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:56.562036+00:00 + duration_ms: 24.072 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-please_close_the_garage_door-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-please_close_the_garage_door-2.yaml new file mode 100644 index 000000000..32ef13e4f --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-please_close_the_garage_door-2.yaml @@ -0,0 +1,134 @@ +--- +uuid: 8a21ef0e-ae4c-4d1c-abbc-3eddad3c0f3b +task_id: home1_us_cover_garage_cover_garage-please_close_the_garage_door-2 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Please close the garage door + expect_changes: + cover.garage_door_opener: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Please close the garage door + context: + id: 01KHJQVTK3NCG3XG3VJ38VB931 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:56.771344+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + created: 2026-02-16 08:07:56.772146+00:00 + - role: user + content: Please close the garage door + attachments: null + created: 2026-02-16 08:07:56.771415+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:56.772154+00:00 + duration_ms: 27.138 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-please_close_the_garage_door-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-please_close_the_garage_door-3.yaml new file mode 100644 index 000000000..db755cc05 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-please_close_the_garage_door-3.yaml @@ -0,0 +1,134 @@ +--- +uuid: 0a81dd47-3de0-4f2b-a6ef-bc192704a18a +task_id: home1_us_cover_garage_cover_garage-please_close_the_garage_door-3 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Please close the garage door + expect_changes: + cover.garage_door_opener: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Please close the garage door + context: + id: 01KHJQVTRDGJ6XH32PE9RMNKT8 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:56.941176+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + created: 2026-02-16 08:07:56.941964+00:00 + - role: user + content: Please close the garage door + attachments: null + created: 2026-02-16 08:07:56.941251+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:56.941973+00:00 + duration_ms: 79.175 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-please_open_the_garage_door-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-please_open_the_garage_door-0.yaml new file mode 100644 index 000000000..2542ba5f8 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-please_open_the_garage_door-0.yaml @@ -0,0 +1,134 @@ +--- +uuid: 50265b8f-662a-4640-8711-5a29aec094cc +task_id: home1_us_cover_garage_cover_garage-please_open_the_garage_door-0 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Please open the garage door + expect_changes: + cover.garage_door_opener: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Please open the garage door + context: + id: 01KHJQVTZNCKYBJ5HB1SQ0Q6E9 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:57.173592+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + created: 2026-02-16 08:07:57.174828+00:00 + - role: user + content: Please open the garage door + attachments: null + created: 2026-02-16 08:07:57.173661+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:57.174838+00:00 + duration_ms: 29.751 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-please_open_the_garage_door-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-please_open_the_garage_door-1.yaml new file mode 100644 index 000000000..6b410a3de --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-please_open_the_garage_door-1.yaml @@ -0,0 +1,134 @@ +--- +uuid: a06851bc-60e7-4160-a190-f4c6b616ffc2 +task_id: home1_us_cover_garage_cover_garage-please_open_the_garage_door-1 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Please open the garage door + expect_changes: + cover.garage_door_opener: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Please open the garage door + context: + id: 01KHJQVV5S419PYD6NTEPQ2XJP + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:57.369686+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + created: 2026-02-16 08:07:57.373170+00:00 + - role: user + content: Please open the garage door + attachments: null + created: 2026-02-16 08:07:57.369759+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:57.373181+00:00 + duration_ms: 102.314 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-please_open_the_garage_door-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-please_open_the_garage_door-2.yaml new file mode 100644 index 000000000..447548f1e --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-please_open_the_garage_door-2.yaml @@ -0,0 +1,134 @@ +--- +uuid: b2f30c52-de64-4aa6-a4b7-263b30dccee3 +task_id: home1_us_cover_garage_cover_garage-please_open_the_garage_door-2 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Please open the garage door + expect_changes: + cover.garage_door_opener: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Please open the garage door + context: + id: 01KHJQVVE5WE2PFTY2SZQ8QEG1 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:57.637860+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + created: 2026-02-16 08:07:57.642381+00:00 + - role: user + content: Please open the garage door + attachments: null + created: 2026-02-16 08:07:57.637934+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:57.642394+00:00 + duration_ms: 99.544 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-please_open_the_garage_door-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-please_open_the_garage_door-3.yaml new file mode 100644 index 000000000..0a9e06179 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_cover_garage_cover_garage-please_open_the_garage_door-3.yaml @@ -0,0 +1,134 @@ +--- +uuid: e0360b19-51c3-463a-9f80-0496f08029ba +task_id: home1_us_cover_garage_cover_garage-please_open_the_garage_door-3 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Please open the garage door + expect_changes: + cover.garage_door_opener: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Please open the garage door + context: + id: 01KHJQVVNZMK2A4XRHYRBE4X17 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:57.887237+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + created: 2026-02-16 08:07:57.891411+00:00 + - role: user + content: Please open the garage door + attachments: null + created: 2026-02-16 08:07:57.887310+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:57.891422+00:00 + duration_ms: 26.419 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-lock_smart_lock-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-lock_smart_lock-0.yaml new file mode 100644 index 000000000..abda95bfb --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-lock_smart_lock-0.yaml @@ -0,0 +1,117 @@ +--- +uuid: 5550fab5-041b-45b0-83e0-9763618b1cbd +task_id: home1_us_lock_smart_lock-lock_smart_lock-0 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock smart lock + expect_changes: + lock.smart_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: locked + got: + state: unlocked + conversation_trace: + - event_type: async_process + data: + text: Lock smart lock + context: + id: 01KHJQVX0NY01CY736FQN4K1YY + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:59.253523+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + created: 2026-02-16 08:07:59.254222+00:00 + - role: user + content: Lock smart lock + attachments: null + created: 2026-02-16 08:07:59.253597+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:59.254231+00:00 + duration_ms: 36.009 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-lock_smart_lock-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-lock_smart_lock-1.yaml new file mode 100644 index 000000000..37a4b7b1a --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-lock_smart_lock-1.yaml @@ -0,0 +1,117 @@ +--- +uuid: cb00a0b6-5652-4fe3-b10f-c7814dca1cdb +task_id: home1_us_lock_smart_lock-lock_smart_lock-1 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock smart lock + expect_changes: + lock.smart_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: locked + got: + state: unlocked + conversation_trace: + - event_type: async_process + data: + text: Lock smart lock + context: + id: 01KHJQVX6ESV684N3QQPBGRFAV + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:59.438454+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + created: 2026-02-16 08:07:59.439096+00:00 + - role: user + content: Lock smart lock + attachments: null + created: 2026-02-16 08:07:59.438527+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:59.439105+00:00 + duration_ms: 29.081 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-lock_smart_lock-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-lock_smart_lock-2.yaml new file mode 100644 index 000000000..69e2e672c --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-lock_smart_lock-2.yaml @@ -0,0 +1,117 @@ +--- +uuid: a67969e9-a6b2-434e-8855-2a5691997a99 +task_id: home1_us_lock_smart_lock-lock_smart_lock-2 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock smart lock + expect_changes: + lock.smart_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: locked + got: + state: unlocked + conversation_trace: + - event_type: async_process + data: + text: Lock smart lock + context: + id: 01KHJQVXCJ7V78V781VA22A8T6 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:59.634536+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + created: 2026-02-16 08:07:59.635185+00:00 + - role: user + content: Lock smart lock + attachments: null + created: 2026-02-16 08:07:59.634611+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:59.635194+00:00 + duration_ms: 20.731 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-lock_smart_lock-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-lock_smart_lock-3.yaml new file mode 100644 index 000000000..4ea2b4edb --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-lock_smart_lock-3.yaml @@ -0,0 +1,117 @@ +--- +uuid: 09bc344a-fb39-40d3-a7ff-feb5db54008f +task_id: home1_us_lock_smart_lock-lock_smart_lock-3 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock smart lock + expect_changes: + lock.smart_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: locked + got: + state: unlocked + conversation_trace: + - event_type: async_process + data: + text: Lock smart lock + context: + id: 01KHJQVXHPZ8BSHDV17HQ300F9 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:59.798410+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + created: 2026-02-16 08:07:59.799038+00:00 + - role: user + content: Lock smart lock + attachments: null + created: 2026-02-16 08:07:59.798483+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:59.799048+00:00 + duration_ms: 20.951 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-lock_the_entry_lock-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-lock_the_entry_lock-0.yaml new file mode 100644 index 000000000..6c37074c7 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-lock_the_entry_lock-0.yaml @@ -0,0 +1,117 @@ +--- +uuid: 92afa315-5ad7-4ee5-8447-4b80428893d0 +task_id: home1_us_lock_smart_lock-lock_the_entry_lock-0 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the entry lock + expect_changes: + lock.smart_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: locked + got: + state: unlocked + conversation_trace: + - event_type: async_process + data: + text: Lock the entry lock + context: + id: 01KHJQVXQCTPPW1SE7R4TPMZTG + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:59.980633+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + created: 2026-02-16 08:07:59.981260+00:00 + - role: user + content: Lock the entry lock + attachments: null + created: 2026-02-16 08:07:59.980704+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:59.981269+00:00 + duration_ms: 26.128 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-lock_the_entry_lock-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-lock_the_entry_lock-1.yaml new file mode 100644 index 000000000..9460d78ef --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-lock_the_entry_lock-1.yaml @@ -0,0 +1,117 @@ +--- +uuid: fae3e50a-5832-474d-8f35-6982b9a0c2f4 +task_id: home1_us_lock_smart_lock-lock_the_entry_lock-1 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the entry lock + expect_changes: + lock.smart_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: locked + got: + state: unlocked + conversation_trace: + - event_type: async_process + data: + text: Lock the entry lock + context: + id: 01KHJQVXXJVGM8PH5DV84RXXVD + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:00.179011+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + created: 2026-02-16 08:08:00.179641+00:00 + - role: user + content: Lock the entry lock + attachments: null + created: 2026-02-16 08:08:00.179116+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:00.179650+00:00 + duration_ms: 27.078 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-lock_the_entry_lock-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-lock_the_entry_lock-2.yaml new file mode 100644 index 000000000..e1fde8689 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-lock_the_entry_lock-2.yaml @@ -0,0 +1,117 @@ +--- +uuid: 6e7ad79c-73e1-4348-9cc3-2864c7e10afa +task_id: home1_us_lock_smart_lock-lock_the_entry_lock-2 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the entry lock + expect_changes: + lock.smart_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: locked + got: + state: unlocked + conversation_trace: + - event_type: async_process + data: + text: Lock the entry lock + context: + id: 01KHJQVY3GDPHVA0FGJZ3590M2 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:00.368215+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + created: 2026-02-16 08:08:00.368832+00:00 + - role: user + content: Lock the entry lock + attachments: null + created: 2026-02-16 08:08:00.368290+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:00.368840+00:00 + duration_ms: 25.775 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-lock_the_entry_lock-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-lock_the_entry_lock-3.yaml new file mode 100644 index 000000000..cf7f4cb17 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-lock_the_entry_lock-3.yaml @@ -0,0 +1,117 @@ +--- +uuid: 45575e6b-1c7a-495f-a290-d17c5109ed71 +task_id: home1_us_lock_smart_lock-lock_the_entry_lock-3 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the entry lock + expect_changes: + lock.smart_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: locked + got: + state: unlocked + conversation_trace: + - event_type: async_process + data: + text: Lock the entry lock + context: + id: 01KHJQVY9S2KNN2Q2VN6VS1ZKH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:00.569936+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + created: 2026-02-16 08:08:00.570571+00:00 + - role: user + content: Lock the entry lock + attachments: null + created: 2026-02-16 08:08:00.570008+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:00.570580+00:00 + duration_ms: 22.816 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-unlock_smart_lock-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-unlock_smart_lock-0.yaml new file mode 100644 index 000000000..d288563c1 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-unlock_smart_lock-0.yaml @@ -0,0 +1,117 @@ +--- +uuid: d4420365-44fb-4106-b5a9-7b23324b610d +task_id: home1_us_lock_smart_lock-unlock_smart_lock-0 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Unlock smart lock + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: unlocked + got: + state: locked + conversation_trace: + - event_type: async_process + data: + text: Unlock smart lock + context: + id: 01KHJQVYFBPW0MGRDN8GFYV31T + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:00.747804+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + created: 2026-02-16 08:08:00.748465+00:00 + - role: user + content: Unlock smart lock + attachments: null + created: 2026-02-16 08:08:00.747880+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:00.748475+00:00 + duration_ms: 24.445 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-unlock_smart_lock-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-unlock_smart_lock-1.yaml new file mode 100644 index 000000000..a8a6dd2fb --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-unlock_smart_lock-1.yaml @@ -0,0 +1,117 @@ +--- +uuid: e5876b56-75ab-434c-953c-41f607d2835f +task_id: home1_us_lock_smart_lock-unlock_smart_lock-1 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Unlock smart lock + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: unlocked + got: + state: locked + conversation_trace: + - event_type: async_process + data: + text: Unlock smart lock + context: + id: 01KHJQVYPC9N2YD0STV2PTFG1E + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:00.972269+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + created: 2026-02-16 08:08:00.972929+00:00 + - role: user + content: Unlock smart lock + attachments: null + created: 2026-02-16 08:08:00.972345+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:00.972938+00:00 + duration_ms: 20.643 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-unlock_smart_lock-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-unlock_smart_lock-2.yaml new file mode 100644 index 000000000..9527c6b04 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-unlock_smart_lock-2.yaml @@ -0,0 +1,117 @@ +--- +uuid: be84d008-f8b1-421d-b4fb-461346abcd4d +task_id: home1_us_lock_smart_lock-unlock_smart_lock-2 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Unlock smart lock + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: unlocked + got: + state: locked + conversation_trace: + - event_type: async_process + data: + text: Unlock smart lock + context: + id: 01KHJQVYVJF0GDMXRH2BZ1TNPG + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:01.138145+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + created: 2026-02-16 08:08:01.138757+00:00 + - role: user + content: Unlock smart lock + attachments: null + created: 2026-02-16 08:08:01.138217+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:01.138765+00:00 + duration_ms: 94.281 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-unlock_smart_lock-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-unlock_smart_lock-3.yaml new file mode 100644 index 000000000..4a099fc5d --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-unlock_smart_lock-3.yaml @@ -0,0 +1,117 @@ +--- +uuid: af29ef05-de51-46cf-b05a-9fabd97edb46 +task_id: home1_us_lock_smart_lock-unlock_smart_lock-3 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Unlock smart lock + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: unlocked + got: + state: locked + conversation_trace: + - event_type: async_process + data: + text: Unlock smart lock + context: + id: 01KHJQVZ2V2YD12F8H9BYPCS5E + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:01.372112+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + created: 2026-02-16 08:08:01.372758+00:00 + - role: user + content: Unlock smart lock + attachments: null + created: 2026-02-16 08:08:01.372201+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:01.372767+00:00 + duration_ms: 20.506 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-unlock_the_entry_lock-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-unlock_the_entry_lock-0.yaml new file mode 100644 index 000000000..fe013c65f --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-unlock_the_entry_lock-0.yaml @@ -0,0 +1,117 @@ +--- +uuid: eb59a9e1-d37a-4835-8260-22a26ae0e903 +task_id: home1_us_lock_smart_lock-unlock_the_entry_lock-0 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Unlock the entry lock + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: unlocked + got: + state: locked + conversation_trace: + - event_type: async_process + data: + text: Unlock the entry lock + context: + id: 01KHJQVZ89D4WBB6T0R2S0704D + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:01.545296+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + created: 2026-02-16 08:08:01.545920+00:00 + - role: user + content: Unlock the entry lock + attachments: null + created: 2026-02-16 08:08:01.545369+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:01.545928+00:00 + duration_ms: 83.237 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-unlock_the_entry_lock-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-unlock_the_entry_lock-1.yaml new file mode 100644 index 000000000..5f4c12553 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-unlock_the_entry_lock-1.yaml @@ -0,0 +1,117 @@ +--- +uuid: a3f4b5dd-f45d-47f9-abf7-eb59b23b616e +task_id: home1_us_lock_smart_lock-unlock_the_entry_lock-1 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Unlock the entry lock + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: unlocked + got: + state: locked + conversation_trace: + - event_type: async_process + data: + text: Unlock the entry lock + context: + id: 01KHJQW097YP76TSP8EFA4QJJ4 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:02.599624+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + created: 2026-02-16 08:08:02.600254+00:00 + - role: user + content: Unlock the entry lock + attachments: null + created: 2026-02-16 08:08:02.599694+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:02.600263+00:00 + duration_ms: 109.431 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-unlock_the_entry_lock-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-unlock_the_entry_lock-2.yaml new file mode 100644 index 000000000..22c4d6830 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-unlock_the_entry_lock-2.yaml @@ -0,0 +1,117 @@ +--- +uuid: 701ce1c0-f906-4e0f-8a25-1ae4abcc9743 +task_id: home1_us_lock_smart_lock-unlock_the_entry_lock-2 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Unlock the entry lock + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: unlocked + got: + state: locked + conversation_trace: + - event_type: async_process + data: + text: Unlock the entry lock + context: + id: 01KHJQW0HX6N7272SBND204H7Q + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:02.877900+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + created: 2026-02-16 08:08:02.878514+00:00 + - role: user + content: Unlock the entry lock + attachments: null + created: 2026-02-16 08:08:02.877970+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:02.878523+00:00 + duration_ms: 22.876 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-unlock_the_entry_lock-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-unlock_the_entry_lock-3.yaml new file mode 100644 index 000000000..75f2e3192 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_lock_smart_lock-unlock_the_entry_lock-3.yaml @@ -0,0 +1,117 @@ +--- +uuid: 87dd6be8-c60f-455e-afc8-b634d938d581 +task_id: home1_us_lock_smart_lock-unlock_the_entry_lock-3 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Unlock the entry lock + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: unlocked + got: + state: locked + conversation_trace: + - event_type: async_process + data: + text: Unlock the entry lock + context: + id: 01KHJQW0QK7NB115YPYZ2BQP2Q + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:03.059434+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + created: 2026-02-16 08:08:03.060019+00:00 + - role: user + content: Unlock the entry lock + attachments: null + created: 2026-02-16 08:08:03.059504+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:03.060027+00:00 + duration_ms: 22.107 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-return_roborock_downstairs_to_base-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-return_roborock_downstairs_to_base-0.yaml new file mode 100644 index 000000000..cf19465bb --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-return_roborock_downstairs_to_base-0.yaml @@ -0,0 +1,115 @@ +--- +uuid: 56930756-51fb-45ef-a268-d25dd79989dc +task_id: home1_us_vacuum_vacuum-return_roborock_downstairs_to_base-0 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Return Roborock Downstairs to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Return Roborock Downstairs to base + context: + id: 01KHJQW2FPR9HS2G2Q4K81NT2J + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:04.854384+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + created: 2026-02-16 08:08:04.855013+00:00 + - role: user + content: Return Roborock Downstairs to base + attachments: null + created: 2026-02-16 08:08:04.854453+00:00 + tools: + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:04.855021+00:00 + duration_ms: 23.472 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-return_roborock_downstairs_to_base-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-return_roborock_downstairs_to_base-1.yaml new file mode 100644 index 000000000..2ade19d2e --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-return_roborock_downstairs_to_base-1.yaml @@ -0,0 +1,115 @@ +--- +uuid: d184f57e-5ad9-40b2-9a19-1973a5c8d254 +task_id: home1_us_vacuum_vacuum-return_roborock_downstairs_to_base-1 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Return Roborock Downstairs to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Return Roborock Downstairs to base + context: + id: 01KHJQW2N02NRY30F9JT28PYTK + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:05.024747+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + created: 2026-02-16 08:08:05.025390+00:00 + - role: user + content: Return Roborock Downstairs to base + attachments: null + created: 2026-02-16 08:08:05.024817+00:00 + tools: + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:05.025399+00:00 + duration_ms: 24.327 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-return_roborock_downstairs_to_base-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-return_roborock_downstairs_to_base-2.yaml new file mode 100644 index 000000000..1b1259176 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-return_roborock_downstairs_to_base-2.yaml @@ -0,0 +1,115 @@ +--- +uuid: 7188643d-d780-4f1f-acab-9c2586888b67 +task_id: home1_us_vacuum_vacuum-return_roborock_downstairs_to_base-2 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Return Roborock Downstairs to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Return Roborock Downstairs to base + context: + id: 01KHJQW2TQHZSNKQ59E3AKZ0JY + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:05.207654+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + created: 2026-02-16 08:08:05.208375+00:00 + - role: user + content: Return Roborock Downstairs to base + attachments: null + created: 2026-02-16 08:08:05.207727+00:00 + tools: + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:05.208386+00:00 + duration_ms: 29.667 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-return_roborock_downstairs_to_base-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-return_roborock_downstairs_to_base-3.yaml new file mode 100644 index 000000000..76e17024f --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-return_roborock_downstairs_to_base-3.yaml @@ -0,0 +1,115 @@ +--- +uuid: a3734d54-6a07-4875-86c0-0a389e7cea2d +task_id: home1_us_vacuum_vacuum-return_roborock_downstairs_to_base-3 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Return Roborock Downstairs to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Return Roborock Downstairs to base + context: + id: 01KHJQW304478MC3YNJHDVBN7X + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:05.380208+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + created: 2026-02-16 08:08:05.380843+00:00 + - role: user + content: Return Roborock Downstairs to base + attachments: null + created: 2026-02-16 08:08:05.380279+00:00 + tools: + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:05.380852+00:00 + duration_ms: 22.935 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-return_vacuum_in_the_living_room_to_base-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-return_vacuum_in_the_living_room_to_base-0.yaml new file mode 100644 index 000000000..553da0262 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-return_vacuum_in_the_living_room_to_base-0.yaml @@ -0,0 +1,115 @@ +--- +uuid: ac9d3647-2b91-4d65-8b9e-888689ac7a2d +task_id: home1_us_vacuum_vacuum-return_vacuum_in_the_living_room_to_base-0 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Return vacuum in the living room to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Return vacuum in the living room to base + context: + id: 01KHJQW35XYN803C6J0E3AB5BY + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:05.565500+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + created: 2026-02-16 08:08:05.566131+00:00 + - role: user + content: Return vacuum in the living room to base + attachments: null + created: 2026-02-16 08:08:05.565569+00:00 + tools: + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:05.566140+00:00 + duration_ms: 22.086 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-return_vacuum_in_the_living_room_to_base-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-return_vacuum_in_the_living_room_to_base-1.yaml new file mode 100644 index 000000000..18cac951f --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-return_vacuum_in_the_living_room_to_base-1.yaml @@ -0,0 +1,115 @@ +--- +uuid: 8b3f871d-a6ce-4d02-bab5-7cd910c552d3 +task_id: home1_us_vacuum_vacuum-return_vacuum_in_the_living_room_to_base-1 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Return vacuum in the living room to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Return vacuum in the living room to base + context: + id: 01KHJQW3B1021YQT9RS74ASHAP + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:05.729439+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + created: 2026-02-16 08:08:05.730035+00:00 + - role: user + content: Return vacuum in the living room to base + attachments: null + created: 2026-02-16 08:08:05.729508+00:00 + tools: + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:05.730044+00:00 + duration_ms: 23.131 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-return_vacuum_in_the_living_room_to_base-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-return_vacuum_in_the_living_room_to_base-2.yaml new file mode 100644 index 000000000..561f2cac8 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-return_vacuum_in_the_living_room_to_base-2.yaml @@ -0,0 +1,115 @@ +--- +uuid: ea8a445a-387b-4613-9cc6-f7b93ad43d2e +task_id: home1_us_vacuum_vacuum-return_vacuum_in_the_living_room_to_base-2 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Return vacuum in the living room to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Return vacuum in the living room to base + context: + id: 01KHJQW3GKB28F8QBCZYNPNG2H + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:05.907694+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + created: 2026-02-16 08:08:05.908378+00:00 + - role: user + content: Return vacuum in the living room to base + attachments: null + created: 2026-02-16 08:08:05.907765+00:00 + tools: + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:05.908387+00:00 + duration_ms: 23.384 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-return_vacuum_in_the_living_room_to_base-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-return_vacuum_in_the_living_room_to_base-3.yaml new file mode 100644 index 000000000..e283536bc --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-return_vacuum_in_the_living_room_to_base-3.yaml @@ -0,0 +1,115 @@ +--- +uuid: e33582b4-ffd8-4ba1-aa93-9e211a243a0c +task_id: home1_us_vacuum_vacuum-return_vacuum_in_the_living_room_to_base-3 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Return vacuum in the living room to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Return vacuum in the living room to base + context: + id: 01KHJQW3PAZGA4MMK53QD5YVKN + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:06.090957+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + created: 2026-02-16 08:08:06.091625+00:00 + - role: user + content: Return vacuum in the living room to base + attachments: null + created: 2026-02-16 08:08:06.091029+00:00 + tools: + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:06.091634+00:00 + duration_ms: 33.804 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum-0.yaml new file mode 100644 index 000000000..5223acb63 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum-0.yaml @@ -0,0 +1,115 @@ +--- +uuid: e29632f8-90a3-4535-947e-7c7c18fbd055 +task_id: home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum-0 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start Roborock Downstairs vacuum + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start Roborock Downstairs vacuum + context: + id: 01KHJQW1K83H6Q7YE0ZJETR2X7 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:03.944563+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + created: 2026-02-16 08:08:03.945208+00:00 + - role: user + content: Start Roborock Downstairs vacuum + attachments: null + created: 2026-02-16 08:08:03.944634+00:00 + tools: + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:03.945217+00:00 + duration_ms: 34.68 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum-1.yaml new file mode 100644 index 000000000..30bda1690 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum-1.yaml @@ -0,0 +1,115 @@ +--- +uuid: 9adf3e7c-dd0c-48eb-b29d-1d4c7ff02162 +task_id: home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum-1 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start Roborock Downstairs vacuum + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start Roborock Downstairs vacuum + context: + id: 01KHJQW1YTM0RWDDNNGZF1K5R5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:04.314962+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + created: 2026-02-16 08:08:04.315620+00:00 + - role: user + content: Start Roborock Downstairs vacuum + attachments: null + created: 2026-02-16 08:08:04.315031+00:00 + tools: + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:04.315629+00:00 + duration_ms: 30.797 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum-2.yaml new file mode 100644 index 000000000..318069b9d --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum-2.yaml @@ -0,0 +1,115 @@ +--- +uuid: 3ab7f881-c008-43ce-ac7d-c96f9215c342 +task_id: home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum-2 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start Roborock Downstairs vacuum + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start Roborock Downstairs vacuum + context: + id: 01KHJQW24B6K6A0T64NBZF8GYB + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:04.491501+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + created: 2026-02-16 08:08:04.492136+00:00 + - role: user + content: Start Roborock Downstairs vacuum + attachments: null + created: 2026-02-16 08:08:04.491568+00:00 + tools: + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:04.492145+00:00 + duration_ms: 26.407 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum-3.yaml new file mode 100644 index 000000000..fb31111c0 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum-3.yaml @@ -0,0 +1,115 @@ +--- +uuid: 9e9d1303-952a-4060-9189-9aa5db075792 +task_id: home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum-3 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start Roborock Downstairs vacuum + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start Roborock Downstairs vacuum + context: + id: 01KHJQW2AT4KHDHWN842GJ3438 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:04.698516+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + created: 2026-02-16 08:08:04.699177+00:00 + - role: user + content: Start Roborock Downstairs vacuum + attachments: null + created: 2026-02-16 08:08:04.698587+00:00 + tools: + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:04.699186+00:00 + duration_ms: 22.008 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-start_vacuum_in_the_living_room-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-start_vacuum_in_the_living_room-0.yaml new file mode 100644 index 000000000..04ee8ee07 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-start_vacuum_in_the_living_room-0.yaml @@ -0,0 +1,115 @@ +--- +uuid: 7782d255-c02d-402d-aa65-01599d43ad14 +task_id: home1_us_vacuum_vacuum-start_vacuum_in_the_living_room-0 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start vacuum in the living room + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start vacuum in the living room + context: + id: 01KHJQW0X1435VWHPZ5A027C0E + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:03.233219+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + created: 2026-02-16 08:08:03.233854+00:00 + - role: user + content: Start vacuum in the living room + attachments: null + created: 2026-02-16 08:08:03.233290+00:00 + tools: + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:03.233862+00:00 + duration_ms: 27.489 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-start_vacuum_in_the_living_room-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-start_vacuum_in_the_living_room-1.yaml new file mode 100644 index 000000000..2afb7530b --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-start_vacuum_in_the_living_room-1.yaml @@ -0,0 +1,115 @@ +--- +uuid: 14b0a438-3457-4172-8665-3678e055ac9b +task_id: home1_us_vacuum_vacuum-start_vacuum_in_the_living_room-1 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start vacuum in the living room + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start vacuum in the living room + context: + id: 01KHJQW12CQ07BQQHDM0JAQYRN + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:03.404602+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + created: 2026-02-16 08:08:03.405289+00:00 + - role: user + content: Start vacuum in the living room + attachments: null + created: 2026-02-16 08:08:03.404672+00:00 + tools: + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:03.405298+00:00 + duration_ms: 25.11 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-start_vacuum_in_the_living_room-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-start_vacuum_in_the_living_room-2.yaml new file mode 100644 index 000000000..478aaf9ae --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-start_vacuum_in_the_living_room-2.yaml @@ -0,0 +1,115 @@ +--- +uuid: a70a7025-3da0-4449-a04e-22d5dd8bb77a +task_id: home1_us_vacuum_vacuum-start_vacuum_in_the_living_room-2 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start vacuum in the living room + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start vacuum in the living room + context: + id: 01KHJQW17MZNCGD4WV1Z1MFZ86 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:03.572833+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + created: 2026-02-16 08:08:03.573522+00:00 + - role: user + content: Start vacuum in the living room + attachments: null + created: 2026-02-16 08:08:03.572903+00:00 + tools: + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:03.573532+00:00 + duration_ms: 22.595 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-start_vacuum_in_the_living_room-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-start_vacuum_in_the_living_room-3.yaml new file mode 100644 index 000000000..d8292bd96 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home1_us_vacuum_vacuum-start_vacuum_in_the_living_room-3.yaml @@ -0,0 +1,115 @@ +--- +uuid: 3a35d53b-abd3-4872-bc30-4c7084ace3dc +task_id: home1_us_vacuum_vacuum-start_vacuum_in_the_living_room-3 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start vacuum in the living room + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start vacuum in the living room + context: + id: 01KHJQW1DAY5VPK76KH6QK3MPN + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:03.754484+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + created: 2026-02-16 08:08:03.755061+00:00 + - role: user + content: Start vacuum in the living room + attachments: null + created: 2026-02-16 08:08:03.754553+00:00 + tools: + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:03.755070+00:00 + duration_ms: 24.28 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-close_the_front_yard_valve-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-close_the_front_yard_valve-0.yaml new file mode 100644 index 000000000..9e4747fcb --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-close_the_front_yard_valve-0.yaml @@ -0,0 +1,131 @@ +--- +uuid: 2a52e67a-3339-4da9-bc78-2fec5c51c555 +task_id: home2_ru_valve_water_valve-close_the_front_yard_valve-0 +model_id: gemma-3-27b-it +category: valve +task: + input_text: close the front yard valve + expect_changes: + valve.irrigation_valve: + state: closed + attributes: + current_position: 0 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: closed + current_position: 0 + got: + state: open + current_position: 100 + conversation_trace: + - event_type: async_process + data: + text: close the front yard valve + context: + id: 01KHJQW5MJ8C67R85N5FG202AQ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:08.083012+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + created: 2026-02-16 08:08:08.083804+00:00 + - role: user + content: close the front yard valve + attachments: null + created: 2026-02-16 08:08:08.083117+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:08.083814+00:00 + duration_ms: 21.035 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-close_the_front_yard_valve-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-close_the_front_yard_valve-1.yaml new file mode 100644 index 000000000..b46094976 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-close_the_front_yard_valve-1.yaml @@ -0,0 +1,131 @@ +--- +uuid: 54342469-ef73-43e2-b716-2a2b94165afd +task_id: home2_ru_valve_water_valve-close_the_front_yard_valve-1 +model_id: gemma-3-27b-it +category: valve +task: + input_text: close the front yard valve + expect_changes: + valve.irrigation_valve: + state: closed + attributes: + current_position: 0 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: closed + current_position: 0 + got: + state: open + current_position: 100 + conversation_trace: + - event_type: async_process + data: + text: close the front yard valve + context: + id: 01KHJQW5TJ715S6EX1AWQNGNG1 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:08.274749+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + created: 2026-02-16 08:08:08.275519+00:00 + - role: user + content: close the front yard valve + attachments: null + created: 2026-02-16 08:08:08.274820+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:08.275529+00:00 + duration_ms: 27.531 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-close_the_front_yard_valve-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-close_the_front_yard_valve-2.yaml new file mode 100644 index 000000000..54e6b80e4 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-close_the_front_yard_valve-2.yaml @@ -0,0 +1,131 @@ +--- +uuid: 0d18391b-3647-4efb-8341-de900735e7d3 +task_id: home2_ru_valve_water_valve-close_the_front_yard_valve-2 +model_id: gemma-3-27b-it +category: valve +task: + input_text: close the front yard valve + expect_changes: + valve.irrigation_valve: + state: closed + attributes: + current_position: 0 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: closed + current_position: 0 + got: + state: open + current_position: 100 + conversation_trace: + - event_type: async_process + data: + text: close the front yard valve + context: + id: 01KHJQW60GM3GW8NPB5RNG0NHN + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:08.464788+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + created: 2026-02-16 08:08:08.465566+00:00 + - role: user + content: close the front yard valve + attachments: null + created: 2026-02-16 08:08:08.464858+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:08.465576+00:00 + duration_ms: 25.972 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-close_the_front_yard_valve-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-close_the_front_yard_valve-3.yaml new file mode 100644 index 000000000..86850c0e7 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-close_the_front_yard_valve-3.yaml @@ -0,0 +1,131 @@ +--- +uuid: d4360570-b51f-4edb-bb4d-8736c39ba021 +task_id: home2_ru_valve_water_valve-close_the_front_yard_valve-3 +model_id: gemma-3-27b-it +category: valve +task: + input_text: close the front yard valve + expect_changes: + valve.irrigation_valve: + state: closed + attributes: + current_position: 0 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: closed + current_position: 0 + got: + state: open + current_position: 100 + conversation_trace: + - event_type: async_process + data: + text: close the front yard valve + context: + id: 01KHJQW67AZW6J7ZH04KK7WDT1 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:08.682336+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + created: 2026-02-16 08:08:08.683140+00:00 + - role: user + content: close the front yard valve + attachments: null + created: 2026-02-16 08:08:08.682411+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:08.683150+00:00 + duration_ms: 24.388 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-close_the_irrigation_valve-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-close_the_irrigation_valve-0.yaml new file mode 100644 index 000000000..16fdc880b --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-close_the_irrigation_valve-0.yaml @@ -0,0 +1,131 @@ +--- +uuid: 16d3e62f-48bd-4ed5-8297-32b4a614d55e +task_id: home2_ru_valve_water_valve-close_the_irrigation_valve-0 +model_id: gemma-3-27b-it +category: valve +task: + input_text: close the irrigation valve + expect_changes: + valve.irrigation_valve: + state: closed + attributes: + current_position: 0 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: closed + current_position: 0 + got: + state: open + current_position: 100 + conversation_trace: + - event_type: async_process + data: + text: close the irrigation valve + context: + id: 01KHJQW6C9ZBHC3ZDQTS1T0KC5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:08.841136+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + created: 2026-02-16 08:08:08.841882+00:00 + - role: user + content: close the irrigation valve + attachments: null + created: 2026-02-16 08:08:08.841208+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:08.841892+00:00 + duration_ms: 21.906 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-close_the_irrigation_valve-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-close_the_irrigation_valve-1.yaml new file mode 100644 index 000000000..31067ff13 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-close_the_irrigation_valve-1.yaml @@ -0,0 +1,131 @@ +--- +uuid: e7de8633-73a6-4e7b-9d98-922c4fdf7091 +task_id: home2_ru_valve_water_valve-close_the_irrigation_valve-1 +model_id: gemma-3-27b-it +category: valve +task: + input_text: close the irrigation valve + expect_changes: + valve.irrigation_valve: + state: closed + attributes: + current_position: 0 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: closed + current_position: 0 + got: + state: open + current_position: 100 + conversation_trace: + - event_type: async_process + data: + text: close the irrigation valve + context: + id: 01KHJQW6WTQN4AE9070K20KTGJ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:09.370213+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + created: 2026-02-16 08:08:09.370954+00:00 + - role: user + content: close the irrigation valve + attachments: null + created: 2026-02-16 08:08:09.370286+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:09.370963+00:00 + duration_ms: 23.537 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-close_the_irrigation_valve-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-close_the_irrigation_valve-2.yaml new file mode 100644 index 000000000..34620a955 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-close_the_irrigation_valve-2.yaml @@ -0,0 +1,131 @@ +--- +uuid: c8d731ee-ed40-41e7-8b99-560c13cb3921 +task_id: home2_ru_valve_water_valve-close_the_irrigation_valve-2 +model_id: gemma-3-27b-it +category: valve +task: + input_text: close the irrigation valve + expect_changes: + valve.irrigation_valve: + state: closed + attributes: + current_position: 0 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: closed + current_position: 0 + got: + state: open + current_position: 100 + conversation_trace: + - event_type: async_process + data: + text: close the irrigation valve + context: + id: 01KHJQW72B9QQY6E9EZ589ZMQP + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:09.547777+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + created: 2026-02-16 08:08:09.548558+00:00 + - role: user + content: close the irrigation valve + attachments: null + created: 2026-02-16 08:08:09.547849+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:09.548568+00:00 + duration_ms: 35.943 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-close_the_irrigation_valve-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-close_the_irrigation_valve-3.yaml new file mode 100644 index 000000000..26a244c56 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-close_the_irrigation_valve-3.yaml @@ -0,0 +1,131 @@ +--- +uuid: b827a3b3-326d-4663-bf49-3ea1a5df1bfb +task_id: home2_ru_valve_water_valve-close_the_irrigation_valve-3 +model_id: gemma-3-27b-it +category: valve +task: + input_text: close the irrigation valve + expect_changes: + valve.irrigation_valve: + state: closed + attributes: + current_position: 0 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: closed + current_position: 0 + got: + state: open + current_position: 100 + conversation_trace: + - event_type: async_process + data: + text: close the irrigation valve + context: + id: 01KHJQW791REPG8ENYDKQ632D9 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:09.761374+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + created: 2026-02-16 08:08:09.762144+00:00 + - role: user + content: close the irrigation valve + attachments: null + created: 2026-02-16 08:08:09.761446+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:09.762154+00:00 + duration_ms: 26.511 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-open_the_front_yard_valve-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-open_the_front_yard_valve-0.yaml new file mode 100644 index 000000000..4d1a2f3d8 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-open_the_front_yard_valve-0.yaml @@ -0,0 +1,131 @@ +--- +uuid: 4250675a-ae85-4ddb-af11-58ee705b8d53 +task_id: home2_ru_valve_water_valve-open_the_front_yard_valve-0 +model_id: gemma-3-27b-it +category: valve +task: + input_text: open the front yard valve + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: open the front yard valve + context: + id: 01KHJQW3VKE90P9VW1A2VW0WFP + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:06.260045+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + created: 2026-02-16 08:08:06.260813+00:00 + - role: user + content: open the front yard valve + attachments: null + created: 2026-02-16 08:08:06.260143+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:06.260823+00:00 + duration_ms: 24.026 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-open_the_front_yard_valve-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-open_the_front_yard_valve-1.yaml new file mode 100644 index 000000000..c8f2a88f4 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-open_the_front_yard_valve-1.yaml @@ -0,0 +1,131 @@ +--- +uuid: cf3cf73d-e08b-43bc-b01a-f90542616d99 +task_id: home2_ru_valve_water_valve-open_the_front_yard_valve-1 +model_id: gemma-3-27b-it +category: valve +task: + input_text: open the front yard valve + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: open the front yard valve + context: + id: 01KHJQW42RQMNDV22W2AVXRDGP + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:06.488978+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + created: 2026-02-16 08:08:06.489739+00:00 + - role: user + content: open the front yard valve + attachments: null + created: 2026-02-16 08:08:06.489045+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:06.489748+00:00 + duration_ms: 24.153 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-open_the_front_yard_valve-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-open_the_front_yard_valve-2.yaml new file mode 100644 index 000000000..1d32adf30 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-open_the_front_yard_valve-2.yaml @@ -0,0 +1,131 @@ +--- +uuid: 6bc035c0-5c17-4951-b28a-9cc0e68157b5 +task_id: home2_ru_valve_water_valve-open_the_front_yard_valve-2 +model_id: gemma-3-27b-it +category: valve +task: + input_text: open the front yard valve + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: open the front yard valve + context: + id: 01KHJQW4857P73KVH44J8DWJB2 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:06.661887+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + created: 2026-02-16 08:08:06.662636+00:00 + - role: user + content: open the front yard valve + attachments: null + created: 2026-02-16 08:08:06.661955+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:06.662645+00:00 + duration_ms: 22.171 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-open_the_front_yard_valve-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-open_the_front_yard_valve-3.yaml new file mode 100644 index 000000000..569c8fb4a --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-open_the_front_yard_valve-3.yaml @@ -0,0 +1,131 @@ +--- +uuid: 018c93ef-37c5-4d6a-bc8c-9dc551a9c252 +task_id: home2_ru_valve_water_valve-open_the_front_yard_valve-3 +model_id: gemma-3-27b-it +category: valve +task: + input_text: open the front yard valve + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: open the front yard valve + context: + id: 01KHJQW4E36EBT1FYS6EBK914D + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:06.851444+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + created: 2026-02-16 08:08:06.852192+00:00 + - role: user + content: open the front yard valve + attachments: null + created: 2026-02-16 08:08:06.851514+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:06.852202+00:00 + duration_ms: 31.451 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-open_the_irrigation_valve-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-open_the_irrigation_valve-0.yaml new file mode 100644 index 000000000..2019f2d0c --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-open_the_irrigation_valve-0.yaml @@ -0,0 +1,131 @@ +--- +uuid: 51014ea7-6b74-4802-9743-929d775c2c27 +task_id: home2_ru_valve_water_valve-open_the_irrigation_valve-0 +model_id: gemma-3-27b-it +category: valve +task: + input_text: open the irrigation valve + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: open the irrigation valve + context: + id: 01KHJQW4KWQ6CWE5VRZ0FGWY79 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:07.037094+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + created: 2026-02-16 08:08:07.037831+00:00 + - role: user + content: open the irrigation valve + attachments: null + created: 2026-02-16 08:08:07.037167+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:07.037840+00:00 + duration_ms: 24.83 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-open_the_irrigation_valve-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-open_the_irrigation_valve-1.yaml new file mode 100644 index 000000000..d113e68b4 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-open_the_irrigation_valve-1.yaml @@ -0,0 +1,131 @@ +--- +uuid: c67f1c4c-85d3-4e07-8f17-aac29b40f08f +task_id: home2_ru_valve_water_valve-open_the_irrigation_valve-1 +model_id: gemma-3-27b-it +category: valve +task: + input_text: open the irrigation valve + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: open the irrigation valve + context: + id: 01KHJQW4S6PBR22SVWYHJ3F5ZZ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:07.206438+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + created: 2026-02-16 08:08:07.207274+00:00 + - role: user + content: open the irrigation valve + attachments: null + created: 2026-02-16 08:08:07.206510+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:07.207284+00:00 + duration_ms: 22.501 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-open_the_irrigation_valve-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-open_the_irrigation_valve-2.yaml new file mode 100644 index 000000000..393948b84 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-open_the_irrigation_valve-2.yaml @@ -0,0 +1,131 @@ +--- +uuid: caf24726-f162-4a6c-8828-e09b71306d01 +task_id: home2_ru_valve_water_valve-open_the_irrigation_valve-2 +model_id: gemma-3-27b-it +category: valve +task: + input_text: open the irrigation valve + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: open the irrigation valve + context: + id: 01KHJQW4Z58HVR3VWJWTZTNH7S + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:07.397968+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + created: 2026-02-16 08:08:07.398721+00:00 + - role: user + content: open the irrigation valve + attachments: null + created: 2026-02-16 08:08:07.398040+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:07.398730+00:00 + duration_ms: 26.286 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-open_the_irrigation_valve-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-open_the_irrigation_valve-3.yaml new file mode 100644 index 000000000..791b02c6d --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-open_the_irrigation_valve-3.yaml @@ -0,0 +1,131 @@ +--- +uuid: b11ec42e-669d-4f29-9c60-646efc6a09a7 +task_id: home2_ru_valve_water_valve-open_the_irrigation_valve-3 +model_id: gemma-3-27b-it +category: valve +task: + input_text: open the irrigation valve + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: open the irrigation valve + context: + id: 01KHJQW54K5N1HWWRV7K5R7EKJ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:07.571334+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + created: 2026-02-16 08:08:07.572030+00:00 + - role: user + content: open the irrigation valve + attachments: null + created: 2026-02-16 08:08:07.571403+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:07.572040+00:00 + duration_ms: 20.72 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-set_the_irrigation_valve_to_50-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-set_the_irrigation_valve_to_50-0.yaml new file mode 100644 index 000000000..e9cf48852 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-set_the_irrigation_valve_to_50-0.yaml @@ -0,0 +1,131 @@ +--- +uuid: ea377047-a0c2-460a-8fd0-b3b6936d9d26 +task_id: home2_ru_valve_water_valve-set_the_irrigation_valve_to_50-0 +model_id: gemma-3-27b-it +category: valve +task: + input_text: set the irrigation valve to 50% + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 50 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 50 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: set the irrigation valve to 50% + context: + id: 01KHJQW7EMTDPMJ1YFGR2T9EEN + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:09.941034+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + created: 2026-02-16 08:08:09.941804+00:00 + - role: user + content: set the irrigation valve to 50% + attachments: null + created: 2026-02-16 08:08:09.941134+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:09.941814+00:00 + duration_ms: 29.025 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-set_the_irrigation_valve_to_50-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-set_the_irrigation_valve_to_50-1.yaml new file mode 100644 index 000000000..c498618c8 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-set_the_irrigation_valve_to_50-1.yaml @@ -0,0 +1,131 @@ +--- +uuid: 966bdd85-3730-425c-b479-4eae5a85d29a +task_id: home2_ru_valve_water_valve-set_the_irrigation_valve_to_50-1 +model_id: gemma-3-27b-it +category: valve +task: + input_text: set the irrigation valve to 50% + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 50 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 50 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: set the irrigation valve to 50% + context: + id: 01KHJQW7MMBG9AKRCXBYNDYP8E + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:10.132165+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + created: 2026-02-16 08:08:10.132900+00:00 + - role: user + content: set the irrigation valve to 50% + attachments: null + created: 2026-02-16 08:08:10.132235+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:10.132909+00:00 + duration_ms: 27.326 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-set_the_irrigation_valve_to_50-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-set_the_irrigation_valve_to_50-2.yaml new file mode 100644 index 000000000..513958cf3 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-set_the_irrigation_valve_to_50-2.yaml @@ -0,0 +1,131 @@ +--- +uuid: 33162f56-ca05-4bd2-8f92-70ae178c2a6d +task_id: home2_ru_valve_water_valve-set_the_irrigation_valve_to_50-2 +model_id: gemma-3-27b-it +category: valve +task: + input_text: set the irrigation valve to 50% + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 50 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 50 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: set the irrigation valve to 50% + context: + id: 01KHJQW7TTH6J4D9DX1TCJ1XZ3 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:10.330374+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + created: 2026-02-16 08:08:10.331501+00:00 + - role: user + content: set the irrigation valve to 50% + attachments: null + created: 2026-02-16 08:08:10.330469+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:10.331514+00:00 + duration_ms: 31.154 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-set_the_irrigation_valve_to_50-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-set_the_irrigation_valve_to_50-3.yaml new file mode 100644 index 000000000..64b7b9f15 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home2_ru_valve_water_valve-set_the_irrigation_valve_to_50-3.yaml @@ -0,0 +1,131 @@ +--- +uuid: 06405bd1-0540-45bd-9b3f-faff9ca5b4a3 +task_id: home2_ru_valve_water_valve-set_the_irrigation_valve_to_50-3 +model_id: gemma-3-27b-it +category: valve +task: + input_text: set the irrigation valve to 50% + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 50 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 50 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: set the irrigation valve to 50% + context: + id: 01KHJQW80FD3QYBFSZG8ZFPCPH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:10.511610+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + created: 2026-02-16 08:08:10.512350+00:00 + - role: user + content: set the irrigation valve to 50% + attachments: null + created: 2026-02-16 08:08:10.511681+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:10.512359+00:00 + duration_ms: 27.869 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_off_the_bedroom_1_fan-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_off_the_bedroom_1_fan-0.yaml new file mode 100644 index 000000000..5b897179a --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_off_the_bedroom_1_fan-0.yaml @@ -0,0 +1,114 @@ +--- +uuid: 07a40e87-dd85-4bbc-90b8-b095e65c64cd +task_id: home5_cn_fan_fan-turn_off_the_bedroom_1_fan-0 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn off the bedroom 1 fan + expect_changes: + fan.bedroom_fan: + state: 'off' + attributes: + percentage: 0 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + percentage: 0 + state: 'off' + got: + percentage: 100 + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn off the bedroom 1 fan + context: + id: 01KHJQWA93E54430GH5YVMCYN7 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:12.835641+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + created: 2026-02-16 08:08:12.836284+00:00 + - role: user + content: Turn off the bedroom 1 fan + attachments: null + created: 2026-02-16 08:08:12.835713+00:00 + tools: + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:12.836293+00:00 + duration_ms: 24.373 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_off_the_bedroom_1_fan-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_off_the_bedroom_1_fan-1.yaml new file mode 100644 index 000000000..fdb8b61c2 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_off_the_bedroom_1_fan-1.yaml @@ -0,0 +1,114 @@ +--- +uuid: 94e39cc7-d38b-4e01-af38-94ea3fa8896f +task_id: home5_cn_fan_fan-turn_off_the_bedroom_1_fan-1 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn off the bedroom 1 fan + expect_changes: + fan.bedroom_fan: + state: 'off' + attributes: + percentage: 0 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + percentage: 0 + state: 'off' + got: + percentage: 100 + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn off the bedroom 1 fan + context: + id: 01KHJQWAFY37RS0818ECXZ43GC + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:13.054555+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + created: 2026-02-16 08:08:13.055239+00:00 + - role: user + content: Turn off the bedroom 1 fan + attachments: null + created: 2026-02-16 08:08:13.054630+00:00 + tools: + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:13.055248+00:00 + duration_ms: 20.301 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_off_the_bedroom_1_fan-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_off_the_bedroom_1_fan-2.yaml new file mode 100644 index 000000000..58445406a --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_off_the_bedroom_1_fan-2.yaml @@ -0,0 +1,114 @@ +--- +uuid: 995310a2-a643-44f2-ad79-9c2c06e57785 +task_id: home5_cn_fan_fan-turn_off_the_bedroom_1_fan-2 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn off the bedroom 1 fan + expect_changes: + fan.bedroom_fan: + state: 'off' + attributes: + percentage: 0 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + percentage: 0 + state: 'off' + got: + percentage: 100 + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn off the bedroom 1 fan + context: + id: 01KHJQWB47YR69YEY2FGFE0K41 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:13.703200+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + created: 2026-02-16 08:08:13.703786+00:00 + - role: user + content: Turn off the bedroom 1 fan + attachments: null + created: 2026-02-16 08:08:13.703273+00:00 + tools: + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:13.703794+00:00 + duration_ms: 28.102 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_off_the_bedroom_1_fan-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_off_the_bedroom_1_fan-3.yaml new file mode 100644 index 000000000..96eba3e48 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_off_the_bedroom_1_fan-3.yaml @@ -0,0 +1,114 @@ +--- +uuid: e3b23faa-dd4c-442e-9d3a-c9e382921356 +task_id: home5_cn_fan_fan-turn_off_the_bedroom_1_fan-3 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn off the bedroom 1 fan + expect_changes: + fan.bedroom_fan: + state: 'off' + attributes: + percentage: 0 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + percentage: 0 + state: 'off' + got: + percentage: 100 + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn off the bedroom 1 fan + context: + id: 01KHJQWB9E22WNF8F0F1Z1T5BH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:13.870233+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + created: 2026-02-16 08:08:13.870820+00:00 + - role: user + content: Turn off the bedroom 1 fan + attachments: null + created: 2026-02-16 08:08:13.870303+00:00 + tools: + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:13.870829+00:00 + duration_ms: 21.204 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_off_the_bedroom_fan-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_off_the_bedroom_fan-0.yaml new file mode 100644 index 000000000..80a913fa7 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_off_the_bedroom_fan-0.yaml @@ -0,0 +1,114 @@ +--- +uuid: d0d693ca-197b-4828-b086-cdfe94974462 +task_id: home5_cn_fan_fan-turn_off_the_bedroom_fan-0 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn off the bedroom fan + expect_changes: + fan.bedroom_fan: + state: 'off' + attributes: + percentage: 0 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + percentage: 0 + state: 'off' + got: + percentage: 100 + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn off the bedroom fan + context: + id: 01KHJQW9JTDKN0ZRAXCE5CFN63 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:12.122841+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + created: 2026-02-16 08:08:12.123451+00:00 + - role: user + content: Turn off the bedroom fan + attachments: null + created: 2026-02-16 08:08:12.122909+00:00 + tools: + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:12.123460+00:00 + duration_ms: 22.56 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_off_the_bedroom_fan-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_off_the_bedroom_fan-1.yaml new file mode 100644 index 000000000..b54df5cad --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_off_the_bedroom_fan-1.yaml @@ -0,0 +1,114 @@ +--- +uuid: 9cebeed7-162d-4305-9d6f-fd38e210d097 +task_id: home5_cn_fan_fan-turn_off_the_bedroom_fan-1 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn off the bedroom fan + expect_changes: + fan.bedroom_fan: + state: 'off' + attributes: + percentage: 0 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + percentage: 0 + state: 'off' + got: + percentage: 100 + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn off the bedroom fan + context: + id: 01KHJQW9RC1KSCT051SB6TT9EM + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:12.300265+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + created: 2026-02-16 08:08:12.300862+00:00 + - role: user + content: Turn off the bedroom fan + attachments: null + created: 2026-02-16 08:08:12.300336+00:00 + tools: + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:12.300871+00:00 + duration_ms: 23.088 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_off_the_bedroom_fan-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_off_the_bedroom_fan-2.yaml new file mode 100644 index 000000000..d8fb8375e --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_off_the_bedroom_fan-2.yaml @@ -0,0 +1,114 @@ +--- +uuid: 7e6c93b3-ee90-4c6a-aa79-c4f156ae7a6b +task_id: home5_cn_fan_fan-turn_off_the_bedroom_fan-2 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn off the bedroom fan + expect_changes: + fan.bedroom_fan: + state: 'off' + attributes: + percentage: 0 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + percentage: 0 + state: 'off' + got: + percentage: 100 + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn off the bedroom fan + context: + id: 01KHJQW9XMZ840JXWDP38KGEVJ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:12.468936+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + created: 2026-02-16 08:08:12.469576+00:00 + - role: user + content: Turn off the bedroom fan + attachments: null + created: 2026-02-16 08:08:12.469008+00:00 + tools: + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:12.469585+00:00 + duration_ms: 21.984 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_off_the_bedroom_fan-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_off_the_bedroom_fan-3.yaml new file mode 100644 index 000000000..4344b6a82 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_off_the_bedroom_fan-3.yaml @@ -0,0 +1,114 @@ +--- +uuid: 4ff74b23-4a9a-405a-ad99-0a1eabaab20b +task_id: home5_cn_fan_fan-turn_off_the_bedroom_fan-3 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn off the bedroom fan + expect_changes: + fan.bedroom_fan: + state: 'off' + attributes: + percentage: 0 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + percentage: 0 + state: 'off' + got: + percentage: 100 + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn off the bedroom fan + context: + id: 01KHJQWA3WWH05SK977ZRJFYCD + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:12.668754+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + created: 2026-02-16 08:08:12.669356+00:00 + - role: user + content: Turn off the bedroom fan + attachments: null + created: 2026-02-16 08:08:12.668825+00:00 + tools: + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:12.669364+00:00 + duration_ms: 27.818 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_on_the_bedroom_1_fan-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_on_the_bedroom_1_fan-0.yaml new file mode 100644 index 000000000..894cf398f --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_on_the_bedroom_1_fan-0.yaml @@ -0,0 +1,114 @@ +--- +uuid: 8edab531-4c69-4bdf-8cff-03283cacafc4 +task_id: home5_cn_fan_fan-turn_on_the_bedroom_1_fan-0 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the bedroom 1 fan + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + percentage: 100 + state: 'on' + got: + percentage: 0 + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Turn on the bedroom 1 fan + context: + id: 01KHJQW85NR582J2ZHR37DS595 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:10.677576+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + created: 2026-02-16 08:08:10.678217+00:00 + - role: user + content: Turn on the bedroom 1 fan + attachments: null + created: 2026-02-16 08:08:10.677646+00:00 + tools: + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:10.678226+00:00 + duration_ms: 23.012 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_on_the_bedroom_1_fan-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_on_the_bedroom_1_fan-1.yaml new file mode 100644 index 000000000..9a9acd7d0 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_on_the_bedroom_1_fan-1.yaml @@ -0,0 +1,114 @@ +--- +uuid: f06a93d7-a4d6-4f75-9d2b-bce943ac7c8e +task_id: home5_cn_fan_fan-turn_on_the_bedroom_1_fan-1 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the bedroom 1 fan + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + percentage: 100 + state: 'on' + got: + percentage: 0 + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Turn on the bedroom 1 fan + context: + id: 01KHJQW8CKMRY1XQ4RKTGE6BBM + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:10.899166+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + created: 2026-02-16 08:08:10.899766+00:00 + - role: user + content: Turn on the bedroom 1 fan + attachments: null + created: 2026-02-16 08:08:10.899238+00:00 + tools: + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:10.899775+00:00 + duration_ms: 30.73 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_on_the_bedroom_1_fan-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_on_the_bedroom_1_fan-2.yaml new file mode 100644 index 000000000..75da15d85 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_on_the_bedroom_1_fan-2.yaml @@ -0,0 +1,114 @@ +--- +uuid: 6aa1d521-4f26-44b3-8572-60b7b9061cff +task_id: home5_cn_fan_fan-turn_on_the_bedroom_1_fan-2 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the bedroom 1 fan + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + percentage: 100 + state: 'on' + got: + percentage: 0 + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Turn on the bedroom 1 fan + context: + id: 01KHJQW8J4178NKQ53DEQ9C3B8 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:11.076610+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + created: 2026-02-16 08:08:11.077275+00:00 + - role: user + content: Turn on the bedroom 1 fan + attachments: null + created: 2026-02-16 08:08:11.076682+00:00 + tools: + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:11.077284+00:00 + duration_ms: 27.516 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_on_the_bedroom_1_fan-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_on_the_bedroom_1_fan-3.yaml new file mode 100644 index 000000000..9eacb71ba --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_on_the_bedroom_1_fan-3.yaml @@ -0,0 +1,114 @@ +--- +uuid: 41662b4c-6a9b-4b26-8680-b49a52f82a68 +task_id: home5_cn_fan_fan-turn_on_the_bedroom_1_fan-3 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the bedroom 1 fan + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + percentage: 100 + state: 'on' + got: + percentage: 0 + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Turn on the bedroom 1 fan + context: + id: 01KHJQW8QBTE6QD6B9YJJC3TCG + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:11.243688+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + created: 2026-02-16 08:08:11.244316+00:00 + - role: user + content: Turn on the bedroom 1 fan + attachments: null + created: 2026-02-16 08:08:11.243758+00:00 + tools: + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:11.244325+00:00 + duration_ms: 21.604 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_on_the_bedroom_fan-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_on_the_bedroom_fan-0.yaml new file mode 100644 index 000000000..d18c2ab16 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_on_the_bedroom_fan-0.yaml @@ -0,0 +1,114 @@ +--- +uuid: 58b0c631-42d3-4dcf-8473-9dda1ceb13ea +task_id: home5_cn_fan_fan-turn_on_the_bedroom_fan-0 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the bedroom fan + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + percentage: 100 + state: 'on' + got: + percentage: 0 + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Turn on the bedroom fan + context: + id: 01KHJQW8W6754TEZSM7YWEF8TY + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:11.398536+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + created: 2026-02-16 08:08:11.399152+00:00 + - role: user + content: Turn on the bedroom fan + attachments: null + created: 2026-02-16 08:08:11.398608+00:00 + tools: + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:11.399162+00:00 + duration_ms: 19.553 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_on_the_bedroom_fan-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_on_the_bedroom_fan-1.yaml new file mode 100644 index 000000000..05b9947cc --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_on_the_bedroom_fan-1.yaml @@ -0,0 +1,114 @@ +--- +uuid: 17eccbfe-7d91-4c02-a44c-c1b31a417dad +task_id: home5_cn_fan_fan-turn_on_the_bedroom_fan-1 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the bedroom fan + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + percentage: 100 + state: 'on' + got: + percentage: 0 + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Turn on the bedroom fan + context: + id: 01KHJQW91AMPJ0Q2V8KCB00JV4 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:11.562386+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + created: 2026-02-16 08:08:11.562959+00:00 + - role: user + content: Turn on the bedroom fan + attachments: null + created: 2026-02-16 08:08:11.562455+00:00 + tools: + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:11.562967+00:00 + duration_ms: 22.515 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_on_the_bedroom_fan-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_on_the_bedroom_fan-2.yaml new file mode 100644 index 000000000..f70e17cfd --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_on_the_bedroom_fan-2.yaml @@ -0,0 +1,114 @@ +--- +uuid: 459c2a0a-d8c5-4906-8a7e-d80b68229041 +task_id: home5_cn_fan_fan-turn_on_the_bedroom_fan-2 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the bedroom fan + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + percentage: 100 + state: 'on' + got: + percentage: 0 + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Turn on the bedroom fan + context: + id: 01KHJQW96XACBXVZXVS6ZEE815 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:11.741558+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + created: 2026-02-16 08:08:11.742183+00:00 + - role: user + content: Turn on the bedroom fan + attachments: null + created: 2026-02-16 08:08:11.741628+00:00 + tools: + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:11.742192+00:00 + duration_ms: 23.753 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_on_the_bedroom_fan-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_on_the_bedroom_fan-3.yaml new file mode 100644 index 000000000..ba36c1a7e --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_fan_fan-turn_on_the_bedroom_fan-3.yaml @@ -0,0 +1,114 @@ +--- +uuid: 5b7456a8-0cff-4577-bd68-fd0c16787251 +task_id: home5_cn_fan_fan-turn_on_the_bedroom_fan-3 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the bedroom fan + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + percentage: 100 + state: 'on' + got: + percentage: 0 + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Turn on the bedroom fan + context: + id: 01KHJQW9CZDAPGK5PHM31MAVGG + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:11.935284+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + created: 2026-02-16 08:08:11.935890+00:00 + - role: user + content: Turn on the bedroom fan + attachments: null + created: 2026-02-16 08:08:11.935354+00:00 + tools: + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:11.935899+00:00 + duration_ms: 26.391 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_light_light-set_the_living_room_light_to_50_brightness-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_light_light-set_the_living_room_light_to_50_brightness-0.yaml new file mode 100644 index 000000000..7776ba91c --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_light_light-set_the_living_room_light_to_50_brightness-0.yaml @@ -0,0 +1,128 @@ +--- +uuid: c781a294-da97-46a4-9932-32821a114d60 +task_id: home5_cn_light_light-set_the_living_room_light_to_50_brightness-0 +model_id: gemma-3-27b-it +category: light +task: + input_text: Set the living room light to 50% brightness + expect_changes: + light.living_room_light: + state: 'on' + attributes: + brightness: 128 + color_mode: brightness +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + brightness: 128 + got: + brightness: 255 + conversation_trace: + - event_type: async_process + data: + text: Set the living room light to 50% brightness + context: + id: 01KHJQWC70MDHRJV4ZMJS0KJNH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:14.816750+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Porch Light + domain: light + areas: Front Porch + created: 2026-02-16 08:08:14.817442+00:00 + - role: user + content: Set the living room light to 50% brightness + attachments: null + created: 2026-02-16 08:08:14.816818+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:14.817452+00:00 + duration_ms: 24.696 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_light_light-set_the_living_room_light_to_50_brightness-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_light_light-set_the_living_room_light_to_50_brightness-1.yaml new file mode 100644 index 000000000..03eb33a2e --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_light_light-set_the_living_room_light_to_50_brightness-1.yaml @@ -0,0 +1,128 @@ +--- +uuid: eab8eb47-f1df-4964-b623-b00c1c64d7ab +task_id: home5_cn_light_light-set_the_living_room_light_to_50_brightness-1 +model_id: gemma-3-27b-it +category: light +task: + input_text: Set the living room light to 50% brightness + expect_changes: + light.living_room_light: + state: 'on' + attributes: + brightness: 128 + color_mode: brightness +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + brightness: 128 + got: + brightness: 255 + conversation_trace: + - event_type: async_process + data: + text: Set the living room light to 50% brightness + context: + id: 01KHJQWCD6Q8J1RSJGJKFYF2YS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:15.014400+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Porch Light + domain: light + areas: Front Porch + created: 2026-02-16 08:08:15.015101+00:00 + - role: user + content: Set the living room light to 50% brightness + attachments: null + created: 2026-02-16 08:08:15.014470+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:15.015110+00:00 + duration_ms: 30.155 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_light_light-set_the_living_room_light_to_50_brightness-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_light_light-set_the_living_room_light_to_50_brightness-2.yaml new file mode 100644 index 000000000..ef8e08f6d --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_light_light-set_the_living_room_light_to_50_brightness-2.yaml @@ -0,0 +1,128 @@ +--- +uuid: a8b09d91-1314-414d-b9eb-8cf1a443d9af +task_id: home5_cn_light_light-set_the_living_room_light_to_50_brightness-2 +model_id: gemma-3-27b-it +category: light +task: + input_text: Set the living room light to 50% brightness + expect_changes: + light.living_room_light: + state: 'on' + attributes: + brightness: 128 + color_mode: brightness +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + brightness: 128 + got: + brightness: 255 + conversation_trace: + - event_type: async_process + data: + text: Set the living room light to 50% brightness + context: + id: 01KHJQWCK0W1GXVX3ZR0CBY7AV + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:15.200486+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Porch Light + domain: light + areas: Front Porch + created: 2026-02-16 08:08:15.201198+00:00 + - role: user + content: Set the living room light to 50% brightness + attachments: null + created: 2026-02-16 08:08:15.200555+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:15.201207+00:00 + duration_ms: 22.296 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_light_light-set_the_living_room_light_to_50_brightness-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_light_light-set_the_living_room_light_to_50_brightness-3.yaml new file mode 100644 index 000000000..867333c6e --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home5_cn_light_light-set_the_living_room_light_to_50_brightness-3.yaml @@ -0,0 +1,128 @@ +--- +uuid: 583e51d5-6e4a-4397-9b63-e1cefcf542df +task_id: home5_cn_light_light-set_the_living_room_light_to_50_brightness-3 +model_id: gemma-3-27b-it +category: light +task: + input_text: Set the living room light to 50% brightness + expect_changes: + light.living_room_light: + state: 'on' + attributes: + brightness: 128 + color_mode: brightness +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + brightness: 128 + got: + brightness: 255 + conversation_trace: + - event_type: async_process + data: + text: Set the living room light to 50% brightness + context: + id: 01KHJQWCRHA9HPSV3Y8FXH4MV7 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:15.378003+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Porch Light + domain: light + areas: Front Porch + created: 2026-02-16 08:08:15.378724+00:00 + - role: user + content: Set the living room light to 50% brightness + attachments: null + created: 2026-02-16 08:08:15.378072+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:15.378733+00:00 + duration_ms: 21.567 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains-0.yaml new file mode 100644 index 000000000..556aeb715 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains-0.yaml @@ -0,0 +1,125 @@ +--- +uuid: 9fc64cb5-3a4b-432f-a0a4-63def85a359a +task_id: home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains-0 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the living room curtains + expect_changes: + cover.smart_curtain: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the living room curtains + context: + id: 01KHJQWJB63CG07BDZXDNRV9WN + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:21.094821+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:21.095565+00:00 + - role: user + content: Close the living room curtains + attachments: null + created: 2026-02-16 08:08:21.094890+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:21.095574+00:00 + duration_ms: 91.984 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains-1.yaml new file mode 100644 index 000000000..c5eea96d9 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains-1.yaml @@ -0,0 +1,125 @@ +--- +uuid: 54fb85b5-484d-44b4-bbee-354cc84de6a7 +task_id: home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains-1 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the living room curtains + expect_changes: + cover.smart_curtain: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the living room curtains + context: + id: 01KHJQWJJSF131VNBVHTZDD421 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:21.338051+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:21.338792+00:00 + - role: user + content: Close the living room curtains + attachments: null + created: 2026-02-16 08:08:21.338151+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:21.338801+00:00 + duration_ms: 95.676 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains-2.yaml new file mode 100644 index 000000000..4cc20b292 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains-2.yaml @@ -0,0 +1,125 @@ +--- +uuid: e00b0c3d-9df5-4eaf-9895-a2c3ace40933 +task_id: home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains-2 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the living room curtains + expect_changes: + cover.smart_curtain: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the living room curtains + context: + id: 01KHJQWJT1MPYQKCSTZJJ6JNMS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:21.569938+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:21.570695+00:00 + - role: user + content: Close the living room curtains + attachments: null + created: 2026-02-16 08:08:21.570007+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:21.570703+00:00 + duration_ms: 90.555 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains-3.yaml new file mode 100644 index 000000000..433f00ac7 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains-3.yaml @@ -0,0 +1,125 @@ +--- +uuid: bd3cd146-4e9f-4b59-b4b9-4331c3c97a69 +task_id: home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains-3 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the living room curtains + expect_changes: + cover.smart_curtain: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the living room curtains + context: + id: 01KHJQWKC9VVCFT2BDYJTYQP8H + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:22.154063+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:22.154801+00:00 + - role: user + content: Close the living room curtains + attachments: null + created: 2026-02-16 08:08:22.154160+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:22.154809+00:00 + duration_ms: 102.349 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_living_room_smart_curtain-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_living_room_smart_curtain-0.yaml new file mode 100644 index 000000000..57d098a80 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_living_room_smart_curtain-0.yaml @@ -0,0 +1,125 @@ +--- +uuid: a327160b-3d18-4131-bf7f-98632d54dd74 +task_id: home7_dk_cover_curtain_cover_curtain-close_the_living_room_smart_curtain-0 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the living room smart curtain + expect_changes: + cover.smart_curtain: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the living room smart curtain + context: + id: 01KHJQWH2MS0C42CDE1D1GSHJP + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:19.796134+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:19.796834+00:00 + - role: user + content: Close the living room smart curtain + attachments: null + created: 2026-02-16 08:08:19.796216+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:19.796843+00:00 + duration_ms: 24.369 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_living_room_smart_curtain-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_living_room_smart_curtain-1.yaml new file mode 100644 index 000000000..fcde54ac3 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_living_room_smart_curtain-1.yaml @@ -0,0 +1,125 @@ +--- +uuid: 2f892244-21dc-498b-a1e9-0869fa7b5e0f +task_id: home7_dk_cover_curtain_cover_curtain-close_the_living_room_smart_curtain-1 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the living room smart curtain + expect_changes: + cover.smart_curtain: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the living room smart curtain + context: + id: 01KHJQWH8EKDQY0P4Y5PRQ8ZHC + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:19.982690+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:19.983438+00:00 + - role: user + content: Close the living room smart curtain + attachments: null + created: 2026-02-16 08:08:19.982759+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:19.983446+00:00 + duration_ms: 88.076 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_living_room_smart_curtain-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_living_room_smart_curtain-2.yaml new file mode 100644 index 000000000..442d33e76 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_living_room_smart_curtain-2.yaml @@ -0,0 +1,125 @@ +--- +uuid: 71fcfaad-25c7-4cef-ac61-9e41db6f84e3 +task_id: home7_dk_cover_curtain_cover_curtain-close_the_living_room_smart_curtain-2 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the living room smart curtain + expect_changes: + cover.smart_curtain: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the living room smart curtain + context: + id: 01KHJQWHGEDT28NC6GZ4HGJ7XC + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:20.238354+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:20.239046+00:00 + - role: user + content: Close the living room smart curtain + attachments: null + created: 2026-02-16 08:08:20.238424+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:20.239054+00:00 + duration_ms: 24.191 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_living_room_smart_curtain-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_living_room_smart_curtain-3.yaml new file mode 100644 index 000000000..dc9aae165 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_living_room_smart_curtain-3.yaml @@ -0,0 +1,125 @@ +--- +uuid: 1d4910db-dc2d-48bd-bf29-0ce75fc3909e +task_id: home7_dk_cover_curtain_cover_curtain-close_the_living_room_smart_curtain-3 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the living room smart curtain + expect_changes: + cover.smart_curtain: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the living room smart curtain + context: + id: 01KHJQWJ5GR827C56WB4N1A0GJ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:20.912574+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:20.913280+00:00 + - role: user + content: Close the living room smart curtain + attachments: null + created: 2026-02-16 08:08:20.912642+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:20.913290+00:00 + duration_ms: 25.914 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_smart_curtains-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_smart_curtains-0.yaml new file mode 100644 index 000000000..0cce799eb --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_smart_curtains-0.yaml @@ -0,0 +1,125 @@ +--- +uuid: f5d0385c-25c5-4048-a2f8-a1153c2d2acd +task_id: home7_dk_cover_curtain_cover_curtain-close_the_smart_curtains-0 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the smart curtains + expect_changes: + cover.smart_curtain: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the smart curtains + context: + id: 01KHJQWG477H4DMXH13Y2NBVHT + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:18.823266+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:18.823933+00:00 + - role: user + content: Close the smart curtains + attachments: null + created: 2026-02-16 08:08:18.823333+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:18.823942+00:00 + duration_ms: 83.015 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_smart_curtains-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_smart_curtains-1.yaml new file mode 100644 index 000000000..dfa7aa955 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_smart_curtains-1.yaml @@ -0,0 +1,125 @@ +--- +uuid: b4f0b4be-7e2d-428b-a354-53a7d3b2a5d7 +task_id: home7_dk_cover_curtain_cover_curtain-close_the_smart_curtains-1 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the smart curtains + expect_changes: + cover.smart_curtain: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the smart curtains + context: + id: 01KHJQWGB28WNF9819QEAXSFTR + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:19.042827+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:19.043543+00:00 + - role: user + content: Close the smart curtains + attachments: null + created: 2026-02-16 08:08:19.042898+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:19.043551+00:00 + duration_ms: 135.794 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_smart_curtains-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_smart_curtains-2.yaml new file mode 100644 index 000000000..772f1b59e --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_smart_curtains-2.yaml @@ -0,0 +1,125 @@ +--- +uuid: e2257ed7-ea76-4d75-9757-21f85bfd2d54 +task_id: home7_dk_cover_curtain_cover_curtain-close_the_smart_curtains-2 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the smart curtains + expect_changes: + cover.smart_curtain: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the smart curtains + context: + id: 01KHJQWGM3P9S6Q8664JZVRXTB + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:19.331967+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:19.332702+00:00 + - role: user + content: Close the smart curtains + attachments: null + created: 2026-02-16 08:08:19.332040+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:19.332711+00:00 + duration_ms: 88.623 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_smart_curtains-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_smart_curtains-3.yaml new file mode 100644 index 000000000..da5ddca17 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-close_the_smart_curtains-3.yaml @@ -0,0 +1,125 @@ +--- +uuid: 72737333-ca5b-4f14-b4cb-e91533ac9a19 +task_id: home7_dk_cover_curtain_cover_curtain-close_the_smart_curtains-3 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the smart curtains + expect_changes: + cover.smart_curtain: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the smart curtains + context: + id: 01KHJQWGVB8EB4347WBJXR6CH9 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:19.563654+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:19.564445+00:00 + - role: user + content: Close the smart curtains + attachments: null + created: 2026-02-16 08:08:19.563727+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:19.564454+00:00 + duration_ms: 85.285 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains-0.yaml new file mode 100644 index 000000000..a9f8bf284 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains-0.yaml @@ -0,0 +1,125 @@ +--- +uuid: 8667bd20-da82-4d8e-887f-3fd8ba047fa6 +task_id: home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains-0 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the living room curtains + expect_changes: + cover.smart_curtain: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the living room curtains + context: + id: 01KHJQWF5BZC66HF0R7CG8JB0D + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:17.836065+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:17.836801+00:00 + - role: user + content: Open the living room curtains + attachments: null + created: 2026-02-16 08:08:17.836174+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:17.836810+00:00 + duration_ms: 92.646 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains-1.yaml new file mode 100644 index 000000000..951b75667 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains-1.yaml @@ -0,0 +1,125 @@ +--- +uuid: ae731cd6-7289-4a75-9ba0-74cc125bff9a +task_id: home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains-1 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the living room curtains + expect_changes: + cover.smart_curtain: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the living room curtains + context: + id: 01KHJQWFCMEM8Y1AX5N0392X53 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:18.069023+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:18.069755+00:00 + - role: user + content: Open the living room curtains + attachments: null + created: 2026-02-16 08:08:18.069134+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:18.069764+00:00 + duration_ms: 33.283 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains-2.yaml new file mode 100644 index 000000000..6c5051ab5 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains-2.yaml @@ -0,0 +1,125 @@ +--- +uuid: 435b5fb6-c450-4661-9746-27cff92c95c8 +task_id: home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains-2 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the living room curtains + expect_changes: + cover.smart_curtain: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the living room curtains + context: + id: 01KHJQWFJRYRTPRVXBP54W51C0 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:18.264930+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:18.265617+00:00 + - role: user + content: Open the living room curtains + attachments: null + created: 2026-02-16 08:08:18.264997+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:18.265626+00:00 + duration_ms: 94.677 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains-3.yaml new file mode 100644 index 000000000..4e9ba8325 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains-3.yaml @@ -0,0 +1,125 @@ +--- +uuid: 264c208b-f7e2-4b78-9e6b-342bd6ba06b1 +task_id: home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains-3 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the living room curtains + expect_changes: + cover.smart_curtain: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the living room curtains + context: + id: 01KHJQWFTRHZAHT97H9MQS0DQN + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:18.520138+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:18.520838+00:00 + - role: user + content: Open the living room curtains + attachments: null + created: 2026-02-16 08:08:18.520209+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:18.520847+00:00 + duration_ms: 103.887 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_living_room_smart_curtain-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_living_room_smart_curtain-0.yaml new file mode 100644 index 000000000..3908c8b18 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_living_room_smart_curtain-0.yaml @@ -0,0 +1,125 @@ +--- +uuid: cc5244ec-d1ec-405d-ba00-f22f75c46f7d +task_id: home7_dk_cover_curtain_cover_curtain-open_the_living_room_smart_curtain-0 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the living room smart curtain + expect_changes: + cover.smart_curtain: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the living room smart curtain + context: + id: 01KHJQWEAC54HBADZBGCN6N3DF + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:16.972884+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:16.973601+00:00 + - role: user + content: Open the living room smart curtain + attachments: null + created: 2026-02-16 08:08:16.972951+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:16.973610+00:00 + duration_ms: 91.879 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_living_room_smart_curtain-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_living_room_smart_curtain-1.yaml new file mode 100644 index 000000000..cc7b20f34 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_living_room_smart_curtain-1.yaml @@ -0,0 +1,125 @@ +--- +uuid: c19776cc-6052-4a17-982c-d4bd1339b374 +task_id: home7_dk_cover_curtain_cover_curtain-open_the_living_room_smart_curtain-1 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the living room smart curtain + expect_changes: + cover.smart_curtain: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the living room smart curtain + context: + id: 01KHJQWEHPMAQ827G9XQ0D7QYX + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:17.206484+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:17.207146+00:00 + - role: user + content: Open the living room smart curtain + attachments: null + created: 2026-02-16 08:08:17.206549+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:17.207154+00:00 + duration_ms: 29.765 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_living_room_smart_curtain-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_living_room_smart_curtain-2.yaml new file mode 100644 index 000000000..611c0b0c1 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_living_room_smart_curtain-2.yaml @@ -0,0 +1,125 @@ +--- +uuid: 9ff0207b-d0b2-418d-a726-bcdddbab1df9 +task_id: home7_dk_cover_curtain_cover_curtain-open_the_living_room_smart_curtain-2 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the living room smart curtain + expect_changes: + cover.smart_curtain: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the living room smart curtain + context: + id: 01KHJQWEQ9AEMZQV6XK3T7EEV9 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:17.385275+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:17.385984+00:00 + - role: user + content: Open the living room smart curtain + attachments: null + created: 2026-02-16 08:08:17.385355+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:17.385993+00:00 + duration_ms: 23.308 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_living_room_smart_curtain-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_living_room_smart_curtain-3.yaml new file mode 100644 index 000000000..da7ead578 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_living_room_smart_curtain-3.yaml @@ -0,0 +1,125 @@ +--- +uuid: 2777e67e-42cd-40ca-8c3e-fc1985b49f96 +task_id: home7_dk_cover_curtain_cover_curtain-open_the_living_room_smart_curtain-3 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the living room smart curtain + expect_changes: + cover.smart_curtain: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the living room smart curtain + context: + id: 01KHJQWEWGPTBJ9GVTJ7628X69 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:17.552980+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:17.553672+00:00 + - role: user + content: Open the living room smart curtain + attachments: null + created: 2026-02-16 08:08:17.553048+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:17.553681+00:00 + duration_ms: 116.453 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_smart_curtain-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_smart_curtain-0.yaml new file mode 100644 index 000000000..88a8707fe --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_smart_curtain-0.yaml @@ -0,0 +1,125 @@ +--- +uuid: 682c2ddd-f320-4e6c-a7d4-ac185cb95d84 +task_id: home7_dk_cover_curtain_cover_curtain-open_the_smart_curtain-0 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the smart curtain + expect_changes: + cover.smart_curtain: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the smart curtain + context: + id: 01KHJQWCXX109ZN2RAJ889T5WD + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:15.549575+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:15.550299+00:00 + - role: user + content: Open the smart curtain + attachments: null + created: 2026-02-16 08:08:15.549644+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:15.550308+00:00 + duration_ms: 59.86 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_smart_curtain-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_smart_curtain-1.yaml new file mode 100644 index 000000000..fa3351366 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_smart_curtain-1.yaml @@ -0,0 +1,125 @@ +--- +uuid: 70390acc-5b4b-4624-93c1-a6f849fcf59c +task_id: home7_dk_cover_curtain_cover_curtain-open_the_smart_curtain-1 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the smart curtain + expect_changes: + cover.smart_curtain: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the smart curtain + context: + id: 01KHJQWDQVY8ANBNZ0KSGK2ZPB + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:16.379469+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:16.380163+00:00 + - role: user + content: Open the smart curtain + attachments: null + created: 2026-02-16 08:08:16.379536+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:16.380172+00:00 + duration_ms: 24.095 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_smart_curtain-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_smart_curtain-2.yaml new file mode 100644 index 000000000..9e706e3c1 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_smart_curtain-2.yaml @@ -0,0 +1,125 @@ +--- +uuid: 98533405-2179-4a72-be0a-5484ad97757f +task_id: home7_dk_cover_curtain_cover_curtain-open_the_smart_curtain-2 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the smart curtain + expect_changes: + cover.smart_curtain: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the smart curtain + context: + id: 01KHJQWDWXYV2PAR0EX56NXXXJ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:16.541583+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:16.542296+00:00 + - role: user + content: Open the smart curtain + attachments: null + created: 2026-02-16 08:08:16.541650+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:16.542305+00:00 + duration_ms: 25.818 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_smart_curtain-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_smart_curtain-3.yaml new file mode 100644 index 000000000..3c95012a1 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-open_the_smart_curtain-3.yaml @@ -0,0 +1,125 @@ +--- +uuid: 825d1edf-80fe-4fce-8f58-ab97ba93277c +task_id: home7_dk_cover_curtain_cover_curtain-open_the_smart_curtain-3 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the smart curtain + expect_changes: + cover.smart_curtain: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the smart curtain + context: + id: 01KHJQWE2EWPHK7354P149ZBHC + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:16.718473+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:16.719138+00:00 + - role: user + content: Open the smart curtain + attachments: null + created: 2026-02-16 08:08:16.718539+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:16.719147+00:00 + duration_ms: 90.585 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-set_the_living_room_curtains_to_50-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-set_the_living_room_curtains_to_50-0.yaml new file mode 100644 index 000000000..0a757763d --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-set_the_living_room_curtains_to_50-0.yaml @@ -0,0 +1,126 @@ +--- +uuid: 84fdb947-c766-4acb-9b25-6454af880846 +task_id: home7_dk_cover_curtain_cover_curtain-set_the_living_room_curtains_to_50-0 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Set the living room curtains to 50% + expect_changes: + cover.smart_curtain: + state: open + attributes: + current_position: 50 +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Set the living room curtains to 50% + context: + id: 01KHJQWMHSVJ9GX0TTBH1E1GZZ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:23.353646+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:23.354333+00:00 + - role: user + content: Set the living room curtains to 50% + attachments: null + created: 2026-02-16 08:08:23.353714+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:23.354342+00:00 + duration_ms: 24.073 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-set_the_living_room_curtains_to_50-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-set_the_living_room_curtains_to_50-1.yaml new file mode 100644 index 000000000..08108d36d --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-set_the_living_room_curtains_to_50-1.yaml @@ -0,0 +1,126 @@ +--- +uuid: 60827f5e-25dd-4c3e-9fc8-95cd619b243b +task_id: home7_dk_cover_curtain_cover_curtain-set_the_living_room_curtains_to_50-1 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Set the living room curtains to 50% + expect_changes: + cover.smart_curtain: + state: open + attributes: + current_position: 50 +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Set the living room curtains to 50% + context: + id: 01KHJQWMPSQ8RZWYZMWECNNPS8 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:23.513571+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:23.514274+00:00 + - role: user + content: Set the living room curtains to 50% + attachments: null + created: 2026-02-16 08:08:23.513639+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:23.514283+00:00 + duration_ms: 80.252 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-set_the_living_room_curtains_to_50-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-set_the_living_room_curtains_to_50-2.yaml new file mode 100644 index 000000000..01df6c103 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-set_the_living_room_curtains_to_50-2.yaml @@ -0,0 +1,126 @@ +--- +uuid: 9e98bbf8-a7db-423b-8ed3-cef3d603aafc +task_id: home7_dk_cover_curtain_cover_curtain-set_the_living_room_curtains_to_50-2 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Set the living room curtains to 50% + expect_changes: + cover.smart_curtain: + state: open + attributes: + current_position: 50 +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Set the living room curtains to 50% + context: + id: 01KHJQWMXM8WZCAXYMR76NVDWJ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:23.732154+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:23.732858+00:00 + - role: user + content: Set the living room curtains to 50% + attachments: null + created: 2026-02-16 08:08:23.732224+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:23.732867+00:00 + duration_ms: 31.772 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-set_the_living_room_curtains_to_50-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-set_the_living_room_curtains_to_50-3.yaml new file mode 100644 index 000000000..7d1f092b8 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-set_the_living_room_curtains_to_50-3.yaml @@ -0,0 +1,126 @@ +--- +uuid: b9e93e90-07da-403f-8383-4e254a8db4b3 +task_id: home7_dk_cover_curtain_cover_curtain-set_the_living_room_curtains_to_50-3 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Set the living room curtains to 50% + expect_changes: + cover.smart_curtain: + state: open + attributes: + current_position: 50 +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Set the living room curtains to 50% + context: + id: 01KHJQWN38GS1ERW1HQ7VN0HRK + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:23.912717+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:23.913423+00:00 + - role: user + content: Set the living room curtains to 50% + attachments: null + created: 2026-02-16 08:08:23.912783+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:23.913433+00:00 + duration_ms: 89.505 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-set_the_smart_curtain_to_50-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-set_the_smart_curtain_to_50-0.yaml new file mode 100644 index 000000000..06cc628dd --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-set_the_smart_curtain_to_50-0.yaml @@ -0,0 +1,126 @@ +--- +uuid: facbb715-28bf-4a22-9b0f-9ca9c7fcb822 +task_id: home7_dk_cover_curtain_cover_curtain-set_the_smart_curtain_to_50-0 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Set the smart curtain to 50% + expect_changes: + cover.smart_curtain: + state: open + attributes: + current_position: 50 +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Set the smart curtain to 50% + context: + id: 01KHJQWKR2EWJY51FS8M0692JS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:22.530715+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:22.531488+00:00 + - role: user + content: Set the smart curtain to 50% + attachments: null + created: 2026-02-16 08:08:22.530785+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:22.531497+00:00 + duration_ms: 90.231 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-set_the_smart_curtain_to_50-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-set_the_smart_curtain_to_50-1.yaml new file mode 100644 index 000000000..0f8cef44c --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-set_the_smart_curtain_to_50-1.yaml @@ -0,0 +1,126 @@ +--- +uuid: 030c34f2-e5c4-46ca-8434-9f4b9912b764 +task_id: home7_dk_cover_curtain_cover_curtain-set_the_smart_curtain_to_50-1 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Set the smart curtain to 50% + expect_changes: + cover.smart_curtain: + state: open + attributes: + current_position: 50 +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Set the smart curtain to 50% + context: + id: 01KHJQWKZ8P99GHWW3TR9FS0KJ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:22.761013+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:22.761717+00:00 + - role: user + content: Set the smart curtain to 50% + attachments: null + created: 2026-02-16 08:08:22.761111+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:22.761726+00:00 + duration_ms: 24.925 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-set_the_smart_curtain_to_50-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-set_the_smart_curtain_to_50-2.yaml new file mode 100644 index 000000000..4006556ef --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-set_the_smart_curtain_to_50-2.yaml @@ -0,0 +1,126 @@ +--- +uuid: 099d47e8-2918-4d87-9fca-24c7bdfde189 +task_id: home7_dk_cover_curtain_cover_curtain-set_the_smart_curtain_to_50-2 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Set the smart curtain to 50% + expect_changes: + cover.smart_curtain: + state: open + attributes: + current_position: 50 +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Set the smart curtain to 50% + context: + id: 01KHJQWM4CW2TSXRXDNH02F7Q8 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:22.925040+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:22.925757+00:00 + - role: user + content: Set the smart curtain to 50% + attachments: null + created: 2026-02-16 08:08:22.925138+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:22.925767+00:00 + duration_ms: 92.236 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-set_the_smart_curtain_to_50-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-set_the_smart_curtain_to_50-3.yaml new file mode 100644 index 000000000..329873bb8 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain_cover_curtain-set_the_smart_curtain_to_50-3.yaml @@ -0,0 +1,126 @@ +--- +uuid: c8a5c8c9-233c-45fa-a949-dc25073ddc32 +task_id: home7_dk_cover_curtain_cover_curtain-set_the_smart_curtain_to_50-3 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Set the smart curtain to 50% + expect_changes: + cover.smart_curtain: + state: open + attributes: + current_position: 50 +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Set the smart curtain to 50% + context: + id: 01KHJQWMC81G565N64VWJ38P88 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:23.176762+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:23.177485+00:00 + - role: user + content: Set the smart curtain to 50% + attachments: null + created: 2026-02-16 08:08:23.176829+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:23.177494+00:00 + duration_ms: 31.074 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-mute_the_outdoor_speakers-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-mute_the_outdoor_speakers-0.yaml new file mode 100644 index 000000000..3959e22ab --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-mute_the_outdoor_speakers-0.yaml @@ -0,0 +1,168 @@ +--- +uuid: 92d271bb-8776-4bad-a01b-13a2f45e1afa +task_id: home7_dk_media_player_media_player-mute_the_outdoor_speakers-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Mute the outdoor speakers + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.0 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.0 + got: + volume_level: 0.6 + conversation_trace: + - event_type: async_process + data: + text: Mute the outdoor speakers + context: + id: 01KHJQWWW6FCMN51SYR8FFVFJ9 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:31.878166+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:31.879551+00:00 + - role: user + content: Mute the outdoor speakers + attachments: null + created: 2026-02-16 08:08:31.878235+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebf1754e0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:31.879561+00:00 + duration_ms: 26.814 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-mute_the_outdoor_speakers-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-mute_the_outdoor_speakers-1.yaml new file mode 100644 index 000000000..dc979bcb8 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-mute_the_outdoor_speakers-1.yaml @@ -0,0 +1,168 @@ +--- +uuid: 5dff96bc-91fa-4ea3-a588-94f7c81bca89 +task_id: home7_dk_media_player_media_player-mute_the_outdoor_speakers-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Mute the outdoor speakers + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.0 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.0 + got: + volume_level: 0.6 + conversation_trace: + - event_type: async_process + data: + text: Mute the outdoor speakers + context: + id: 01KHJQWX1N5MJ2WG397FDAKTG4 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:32.053752+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:32.054879+00:00 + - role: user + content: Mute the outdoor speakers + attachments: null + created: 2026-02-16 08:08:32.053823+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebdb473d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:32.054888+00:00 + duration_ms: 81.634 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-mute_the_outdoor_speakers-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-mute_the_outdoor_speakers-2.yaml new file mode 100644 index 000000000..1dc036db7 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-mute_the_outdoor_speakers-2.yaml @@ -0,0 +1,168 @@ +--- +uuid: a47c853b-6c4b-4b53-b242-4ac8d53d7785 +task_id: home7_dk_media_player_media_player-mute_the_outdoor_speakers-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Mute the outdoor speakers + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.0 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.0 + got: + volume_level: 0.6 + conversation_trace: + - event_type: async_process + data: + text: Mute the outdoor speakers + context: + id: 01KHJQWXJ7BKH9CY94GY4QQ8F9 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:32.583191+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:32.584246+00:00 + - role: user + content: Mute the outdoor speakers + attachments: null + created: 2026-02-16 08:08:32.583262+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebd7212d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:32.584255+00:00 + duration_ms: 28.778 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-mute_the_outdoor_speakers-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-mute_the_outdoor_speakers-3.yaml new file mode 100644 index 000000000..8c56531ec --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-mute_the_outdoor_speakers-3.yaml @@ -0,0 +1,168 @@ +--- +uuid: d605f216-e4fd-4a82-aebe-4993f3578d74 +task_id: home7_dk_media_player_media_player-mute_the_outdoor_speakers-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Mute the outdoor speakers + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.0 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.0 + got: + volume_level: 0.6 + conversation_trace: + - event_type: async_process + data: + text: Mute the outdoor speakers + context: + id: 01KHJQWXR0CD0W9A8Y6MXSTJ8W + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:32.768499+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:32.773915+00:00 + - role: user + content: Mute the outdoor speakers + attachments: null + created: 2026-02-16 08:08:32.768567+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebf0be6c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:32.773926+00:00 + duration_ms: 89.453 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-pause_outdoor_speakers-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-pause_outdoor_speakers-0.yaml new file mode 100644 index 000000000..01ed1f047 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-pause_outdoor_speakers-0.yaml @@ -0,0 +1,167 @@ +--- +uuid: 1d2ca239-d151-48be-b63d-ba9d409f8436 +task_id: home7_dk_media_player_media_player-pause_outdoor_speakers-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause Outdoor Speakers + context: + id: 01KHJQWNB4EV3X2TY46A8GJB4N + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:24.164798+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:24.166365+00:00 + - role: user + content: Pause Outdoor Speakers + attachments: null + created: 2026-02-16 08:08:24.164865+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ecc418300>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:24.166374+00:00 + duration_ms: 32.327 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-pause_outdoor_speakers-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-pause_outdoor_speakers-1.yaml new file mode 100644 index 000000000..31f86becf --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-pause_outdoor_speakers-1.yaml @@ -0,0 +1,167 @@ +--- +uuid: 52105abf-7fe2-46a9-8896-fe7510752e71 +task_id: home7_dk_media_player_media_player-pause_outdoor_speakers-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause Outdoor Speakers + context: + id: 01KHJQWNGK5XPAPZX335YYFAPC + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:24.339624+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:24.345365+00:00 + - role: user + content: Pause Outdoor Speakers + attachments: null + created: 2026-02-16 08:08:24.339692+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5eccf492d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:24.345376+00:00 + duration_ms: 100.928 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-pause_outdoor_speakers-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-pause_outdoor_speakers-2.yaml new file mode 100644 index 000000000..fd5e8623b --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-pause_outdoor_speakers-2.yaml @@ -0,0 +1,167 @@ +--- +uuid: 08fac00c-1b11-4a54-9367-eaadad1d1f21 +task_id: home7_dk_media_player_media_player-pause_outdoor_speakers-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause Outdoor Speakers + context: + id: 01KHJQWNR9YD54YD1N8GFEE9S4 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:24.585407+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:24.587703+00:00 + - role: user + content: Pause Outdoor Speakers + attachments: null + created: 2026-02-16 08:08:24.585476+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ecc2e0b40>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:24.587713+00:00 + duration_ms: 25.473 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-pause_outdoor_speakers-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-pause_outdoor_speakers-3.yaml new file mode 100644 index 000000000..31957f9da --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-pause_outdoor_speakers-3.yaml @@ -0,0 +1,167 @@ +--- +uuid: cd9a9d0c-43b8-4a58-a07a-875150a8f7ff +task_id: home7_dk_media_player_media_player-pause_outdoor_speakers-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause Outdoor Speakers + context: + id: 01KHJQWNZX8KJT3H819AMA8KNP + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:24.830115+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:24.831264+00:00 + - role: user + content: Pause Outdoor Speakers + attachments: null + created: 2026-02-16 08:08:24.830195+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebff95590>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:24.831273+00:00 + duration_ms: 93.778 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-pause_the_rooftop_terrace_music-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-pause_the_rooftop_terrace_music-0.yaml new file mode 100644 index 000000000..0e64ccc2b --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-pause_the_rooftop_terrace_music-0.yaml @@ -0,0 +1,167 @@ +--- +uuid: deb4e548-ce62-4ce8-807e-17abaa31d196 +task_id: home7_dk_media_player_media_player-pause_the_rooftop_terrace_music-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause the Rooftop Terrace music + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause the Rooftop Terrace music + context: + id: 01KHJQWPN95T6E7FPYNRCE8CVP + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:25.513514+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:25.514625+00:00 + - role: user + content: Pause the Rooftop Terrace music + attachments: null + created: 2026-02-16 08:08:25.513584+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebf1fcbf0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:25.514634+00:00 + duration_ms: 123.407 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-pause_the_rooftop_terrace_music-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-pause_the_rooftop_terrace_music-1.yaml new file mode 100644 index 000000000..386840a24 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-pause_the_rooftop_terrace_music-1.yaml @@ -0,0 +1,167 @@ +--- +uuid: bfaa3fa2-4c2f-4f5d-a98a-7051df6077aa +task_id: home7_dk_media_player_media_player-pause_the_rooftop_terrace_music-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause the Rooftop Terrace music + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause the Rooftop Terrace music + context: + id: 01KHJQWPY1MM3S4T49QW5QJZE7 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:25.793505+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:25.794618+00:00 + - role: user + content: Pause the Rooftop Terrace music + attachments: null + created: 2026-02-16 08:08:25.793577+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebf0901a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:25.794627+00:00 + duration_ms: 25.669 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-pause_the_rooftop_terrace_music-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-pause_the_rooftop_terrace_music-2.yaml new file mode 100644 index 000000000..4def8e009 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-pause_the_rooftop_terrace_music-2.yaml @@ -0,0 +1,167 @@ +--- +uuid: f812aceb-8550-423c-9451-82ab0dbaa6a9 +task_id: home7_dk_media_player_media_player-pause_the_rooftop_terrace_music-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause the Rooftop Terrace music + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause the Rooftop Terrace music + context: + id: 01KHJQWQ36J39R53363X5TXX6W + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:25.959042+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:25.960652+00:00 + - role: user + content: Pause the Rooftop Terrace music + attachments: null + created: 2026-02-16 08:08:25.959145+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebfa72350>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:25.960663+00:00 + duration_ms: 94.654 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-pause_the_rooftop_terrace_music-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-pause_the_rooftop_terrace_music-3.yaml new file mode 100644 index 000000000..878ed5062 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-pause_the_rooftop_terrace_music-3.yaml @@ -0,0 +1,167 @@ +--- +uuid: 3689ffed-7ab6-4f20-bc66-c22ac189f093 +task_id: home7_dk_media_player_media_player-pause_the_rooftop_terrace_music-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause the Rooftop Terrace music + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause the Rooftop Terrace music + context: + id: 01KHJQWQAPSQBSPG9WQT9JNKZZ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:26.198727+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:26.200364+00:00 + - role: user + content: Pause the Rooftop Terrace music + attachments: null + created: 2026-02-16 08:08:26.198799+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebfb6d2d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:26.200375+00:00 + duration_ms: 28.37 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-resume_outdoor_speakers-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-resume_outdoor_speakers-0.yaml new file mode 100644 index 000000000..85eb58b8b --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-resume_outdoor_speakers-0.yaml @@ -0,0 +1,167 @@ +--- +uuid: fcba6ced-6c92-413e-a65d-30788c2d88a4 +task_id: home7_dk_media_player_media_player-resume_outdoor_speakers-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Resume Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Resume Outdoor Speakers + context: + id: 01KHJQWQGFRKHYPX20WK4NM0FC + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:26.383361+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:26.384882+00:00 + - role: user + content: Resume Outdoor Speakers + attachments: null + created: 2026-02-16 08:08:26.383434+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ecc7c4040>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:26.384892+00:00 + duration_ms: 24.49 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-resume_outdoor_speakers-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-resume_outdoor_speakers-1.yaml new file mode 100644 index 000000000..7b002ee8e --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-resume_outdoor_speakers-1.yaml @@ -0,0 +1,167 @@ +--- +uuid: 03938bd6-ddaa-4bca-a956-8f357df3eba9 +task_id: home7_dk_media_player_media_player-resume_outdoor_speakers-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Resume Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Resume Outdoor Speakers + context: + id: 01KHJQWQPBZNP8CGMBPW72GWQK + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:26.571900+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:26.573049+00:00 + - role: user + content: Resume Outdoor Speakers + attachments: null + created: 2026-02-16 08:08:26.571971+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ecc7f6da0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:26.573058+00:00 + duration_ms: 26.546 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-resume_outdoor_speakers-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-resume_outdoor_speakers-2.yaml new file mode 100644 index 000000000..e0d5879c3 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-resume_outdoor_speakers-2.yaml @@ -0,0 +1,167 @@ +--- +uuid: 3dcea39b-7fb0-4b6b-ab10-66c0ed119ff7 +task_id: home7_dk_media_player_media_player-resume_outdoor_speakers-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Resume Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Resume Outdoor Speakers + context: + id: 01KHJQWQW22JT9008BE7XX8M6C + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:26.754782+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:26.755943+00:00 + - role: user + content: Resume Outdoor Speakers + attachments: null + created: 2026-02-16 08:08:26.754851+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebf7a9220>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:26.755953+00:00 + duration_ms: 35.444 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-resume_outdoor_speakers-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-resume_outdoor_speakers-3.yaml new file mode 100644 index 000000000..72e9d37a4 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-resume_outdoor_speakers-3.yaml @@ -0,0 +1,167 @@ +--- +uuid: 44eea2dd-294f-41fc-a667-9f77ea20f544 +task_id: home7_dk_media_player_media_player-resume_outdoor_speakers-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Resume Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Resume Outdoor Speakers + context: + id: 01KHJQWR2A24NRHB1REX1WYM37 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:26.954242+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:26.955836+00:00 + - role: user + content: Resume Outdoor Speakers + attachments: null + created: 2026-02-16 08:08:26.954315+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebdb9bd70>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:26.955846+00:00 + duration_ms: 26.858 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50-0.yaml new file mode 100644 index 000000000..ab69d488a --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50-0.yaml @@ -0,0 +1,168 @@ +--- +uuid: 9c3b926f-69d6-4c88-8725-9818d13fcba3 +task_id: home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Set outdoor speakers volume to 50% + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.5 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.5 + got: + volume_level: 0.9 + conversation_trace: + - event_type: async_process + data: + text: Set outdoor speakers volume to 50% + context: + id: 01KHJQWV7WZC272WJCSTE6ZRVR + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:30.204236+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:30.209933+00:00 + - role: user + content: Set outdoor speakers volume to 50% + attachments: null + created: 2026-02-16 08:08:30.204307+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebcd59fe0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:30.209945+00:00 + duration_ms: 30.046 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50-1.yaml new file mode 100644 index 000000000..f1a71e65e --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50-1.yaml @@ -0,0 +1,168 @@ +--- +uuid: 63e94fde-69e1-4722-aa68-55e2d688c305 +task_id: home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Set outdoor speakers volume to 50% + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.5 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.5 + got: + volume_level: 0.9 + conversation_trace: + - event_type: async_process + data: + text: Set outdoor speakers volume to 50% + context: + id: 01KHJQWVDHR8MEKH796F1R78P8 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:30.385519+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:30.386701+00:00 + - role: user + content: Set outdoor speakers volume to 50% + attachments: null + created: 2026-02-16 08:08:30.385588+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ecc3a57a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:30.386711+00:00 + duration_ms: 33.124 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50-2.yaml new file mode 100644 index 000000000..b912df757 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50-2.yaml @@ -0,0 +1,168 @@ +--- +uuid: bbd0f360-967c-4d5c-957d-93ce8d9215ce +task_id: home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Set outdoor speakers volume to 50% + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.5 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.5 + got: + volume_level: 0.9 + conversation_trace: + - event_type: async_process + data: + text: Set outdoor speakers volume to 50% + context: + id: 01KHJQWVK5SCCWE3DVSW19YD4P + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:30.565544+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:30.567893+00:00 + - role: user + content: Set outdoor speakers volume to 50% + attachments: null + created: 2026-02-16 08:08:30.565616+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ecc577ed0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:30.567904+00:00 + duration_ms: 33.014 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50-3.yaml new file mode 100644 index 000000000..47d84355e --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50-3.yaml @@ -0,0 +1,168 @@ +--- +uuid: cd6ff565-7ec8-44fd-8ec2-0b5b32a87049 +task_id: home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Set outdoor speakers volume to 50% + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.5 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.5 + got: + volume_level: 0.9 + conversation_trace: + - event_type: async_process + data: + text: Set outdoor speakers volume to 50% + context: + id: 01KHJQWVVG033PDA2Z4CSYMRQY + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:30.832621+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:30.833783+00:00 + - role: user + content: Set outdoor speakers volume to 50% + attachments: null + created: 2026-02-16 08:08:30.832691+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ecc259640>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:30.833793+00:00 + duration_ms: 35.995 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-skip_song_on_outdoor_speakers-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-skip_song_on_outdoor_speakers-0.yaml new file mode 100644 index 000000000..72de493a6 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-skip_song_on_outdoor_speakers-0.yaml @@ -0,0 +1,168 @@ +--- +uuid: 5e943956-0641-42d6-a42c-40dc674f0885 +task_id: home7_dk_media_player_media_player-skip_song_on_outdoor_speakers-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Skip song on outdoor speakers + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Skip song on outdoor speakers + context: + id: 01KHJQWS9AKP3C9Z4333ZZ60K9 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:28.202288+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:28.203410+00:00 + - role: user + content: Skip song on outdoor speakers + attachments: null + created: 2026-02-16 08:08:28.202357+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebd9fbd70>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:28.203420+00:00 + duration_ms: 81.078 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-skip_song_on_outdoor_speakers-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-skip_song_on_outdoor_speakers-1.yaml new file mode 100644 index 000000000..867c576ca --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-skip_song_on_outdoor_speakers-1.yaml @@ -0,0 +1,168 @@ +--- +uuid: 0b8fb7c2-56e3-4b17-83d4-37ca8c042b30 +task_id: home7_dk_media_player_media_player-skip_song_on_outdoor_speakers-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Skip song on outdoor speakers + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Skip song on outdoor speakers + context: + id: 01KHJQWSGBYRWC9E1QVDY6C5XP + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:28.427736+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:28.433300+00:00 + - role: user + content: Skip song on outdoor speakers + attachments: null + created: 2026-02-16 08:08:28.427805+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebd8f75e0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:28.433312+00:00 + duration_ms: 29.582 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-skip_song_on_outdoor_speakers-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-skip_song_on_outdoor_speakers-2.yaml new file mode 100644 index 000000000..893fea3b2 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-skip_song_on_outdoor_speakers-2.yaml @@ -0,0 +1,168 @@ +--- +uuid: 7527949a-1422-411d-b371-750abf654c4a +task_id: home7_dk_media_player_media_player-skip_song_on_outdoor_speakers-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Skip song on outdoor speakers + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Skip song on outdoor speakers + context: + id: 01KHJQWSNPRMW2WE6Z3MFV843F + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:28.598886+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:28.603185+00:00 + - role: user + content: Skip song on outdoor speakers + attachments: null + created: 2026-02-16 08:08:28.598956+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5eccc6d220>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:28.603197+00:00 + duration_ms: 28.641 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-skip_song_on_outdoor_speakers-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-skip_song_on_outdoor_speakers-3.yaml new file mode 100644 index 000000000..7cd35e27c --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-skip_song_on_outdoor_speakers-3.yaml @@ -0,0 +1,168 @@ +--- +uuid: 46202974-7c1f-49ec-b29f-e425a48e184b +task_id: home7_dk_media_player_media_player-skip_song_on_outdoor_speakers-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Skip song on outdoor speakers + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Skip song on outdoor speakers + context: + id: 01KHJQWSVVXYZDZG4QQX6JBYFQ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:28.795645+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:28.799302+00:00 + - role: user + content: Skip song on outdoor speakers + attachments: null + created: 2026-02-16 08:08:28.795714+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebf149d20>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:28.799313+00:00 + duration_ms: 27.325 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-skip_to_the_next_track_on_the_outdoor_speakers-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-skip_to_the_next_track_on_the_outdoor_speakers-0.yaml new file mode 100644 index 000000000..fc7f4912c --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-skip_to_the_next_track_on_the_outdoor_speakers-0.yaml @@ -0,0 +1,168 @@ +--- +uuid: 2721a876-d1bb-4c3a-b2d8-3547264e1a45 +task_id: home7_dk_media_player_media_player-skip_to_the_next_track_on_the_outdoor_speakers-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Skip to the next track on the outdoor speakers + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Skip to the next track on the outdoor speakers + context: + id: 01KHJQWT1CNK9WV2Z5W5S4NT1B + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:28.972551+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:28.974797+00:00 + - role: user + content: Skip to the next track on the outdoor speakers + attachments: null + created: 2026-02-16 08:08:28.972621+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebf7a2e50>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:28.974807+00:00 + duration_ms: 25.73 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-skip_to_the_next_track_on_the_outdoor_speakers-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-skip_to_the_next_track_on_the_outdoor_speakers-1.yaml new file mode 100644 index 000000000..0b3d2379f --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-skip_to_the_next_track_on_the_outdoor_speakers-1.yaml @@ -0,0 +1,168 @@ +--- +uuid: c33f32c8-1e44-4a27-9ef3-745879498195 +task_id: home7_dk_media_player_media_player-skip_to_the_next_track_on_the_outdoor_speakers-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Skip to the next track on the outdoor speakers + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Skip to the next track on the outdoor speakers + context: + id: 01KHJQWTMJE1QZT44T1207J96F + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:29.586375+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:29.587448+00:00 + - role: user + content: Skip to the next track on the outdoor speakers + attachments: null + created: 2026-02-16 08:08:29.586445+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebf29ef00>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:29.587457+00:00 + duration_ms: 95.365 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-skip_to_the_next_track_on_the_outdoor_speakers-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-skip_to_the_next_track_on_the_outdoor_speakers-2.yaml new file mode 100644 index 000000000..d055e152d --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-skip_to_the_next_track_on_the_outdoor_speakers-2.yaml @@ -0,0 +1,168 @@ +--- +uuid: 4c613e1a-a660-43fe-bedd-61934cd6bc42 +task_id: home7_dk_media_player_media_player-skip_to_the_next_track_on_the_outdoor_speakers-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Skip to the next track on the outdoor speakers + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Skip to the next track on the outdoor speakers + context: + id: 01KHJQWTX0GS1NZHRPTNM30HHB + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:29.856291+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:29.859196+00:00 + - role: user + content: Skip to the next track on the outdoor speakers + attachments: null + created: 2026-02-16 08:08:29.856362+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ecc7dfcc0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:29.859207+00:00 + duration_ms: 33.143 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-skip_to_the_next_track_on_the_outdoor_speakers-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-skip_to_the_next_track_on_the_outdoor_speakers-3.yaml new file mode 100644 index 000000000..0a46a1cb7 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-skip_to_the_next_track_on_the_outdoor_speakers-3.yaml @@ -0,0 +1,168 @@ +--- +uuid: eb1e2752-e04d-4182-ab05-1897097cc058 +task_id: home7_dk_media_player_media_player-skip_to_the_next_track_on_the_outdoor_speakers-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Skip to the next track on the outdoor speakers + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Skip to the next track on the outdoor speakers + context: + id: 01KHJQWV2BM8PCSD6C16BQFG7T + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:30.028064+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:30.034338+00:00 + - role: user + content: Skip to the next track on the outdoor speakers + attachments: null + created: 2026-02-16 08:08:30.028171+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebda8e2a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:30.034350+00:00 + duration_ms: 28.164 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-turn_off_the_outdoor_speakers-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-turn_off_the_outdoor_speakers-0.yaml new file mode 100644 index 000000000..b0fbd16e7 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-turn_off_the_outdoor_speakers-0.yaml @@ -0,0 +1,167 @@ +--- +uuid: 8f5de3b3-c5df-4a6f-9f32-922c195562e9 +task_id: home7_dk_media_player_media_player-turn_off_the_outdoor_speakers-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Turn off the Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: 'off' + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Turn off the Outdoor Speakers + context: + id: 01KHJQWXZQDDK1P7QEA3BH3Y4J + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:33.015781+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:33.019396+00:00 + - role: user + content: Turn off the Outdoor Speakers + attachments: null + created: 2026-02-16 08:08:33.015849+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebff90040>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:33.019407+00:00 + duration_ms: 99.971 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-turn_off_the_outdoor_speakers-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-turn_off_the_outdoor_speakers-1.yaml new file mode 100644 index 000000000..aacbaeaaa --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-turn_off_the_outdoor_speakers-1.yaml @@ -0,0 +1,167 @@ +--- +uuid: 68d31207-c38d-4162-bec7-1c9995e5963d +task_id: home7_dk_media_player_media_player-turn_off_the_outdoor_speakers-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Turn off the Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: 'off' + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Turn off the Outdoor Speakers + context: + id: 01KHJQWY7VEW6X71WTP2P70RA7 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:33.275261+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:33.276809+00:00 + - role: user + content: Turn off the Outdoor Speakers + attachments: null + created: 2026-02-16 08:08:33.275331+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebf75e350>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:33.276818+00:00 + duration_ms: 90.799 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-turn_off_the_outdoor_speakers-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-turn_off_the_outdoor_speakers-2.yaml new file mode 100644 index 000000000..64be8355c --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-turn_off_the_outdoor_speakers-2.yaml @@ -0,0 +1,167 @@ +--- +uuid: 8e75a783-4b95-4a59-8d1e-c5cf2ae248ba +task_id: home7_dk_media_player_media_player-turn_off_the_outdoor_speakers-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Turn off the Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: 'off' + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Turn off the Outdoor Speakers + context: + id: 01KHJQWYFPJZEWZ8RPJRD66987 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:33.526334+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:33.528530+00:00 + - role: user + content: Turn off the Outdoor Speakers + attachments: null + created: 2026-02-16 08:08:33.526403+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebdcc8510>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:33.528539+00:00 + duration_ms: 26.184 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-turn_off_the_outdoor_speakers-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-turn_off_the_outdoor_speakers-3.yaml new file mode 100644 index 000000000..108f94f8c --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-turn_off_the_outdoor_speakers-3.yaml @@ -0,0 +1,167 @@ +--- +uuid: e55ee7aa-ecb3-4aeb-b24b-f0771f4991c5 +task_id: home7_dk_media_player_media_player-turn_off_the_outdoor_speakers-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Turn off the Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: 'off' + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Turn off the Outdoor Speakers + context: + id: 01KHJQWYP30QFZ6A444Z5VFYT0 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:33.731522+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:33.734048+00:00 + - role: user + content: Turn off the Outdoor Speakers + attachments: null + created: 2026-02-16 08:08:33.731593+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebfa556f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:33.734058+00:00 + duration_ms: 106.942 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-turn_the_volume_down_to_50-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-turn_the_volume_down_to_50-0.yaml new file mode 100644 index 000000000..55f94c2ee --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-turn_the_volume_down_to_50-0.yaml @@ -0,0 +1,168 @@ +--- +uuid: 9c2478ef-9b8a-40c2-856d-b0d5a4752dcc +task_id: home7_dk_media_player_media_player-turn_the_volume_down_to_50-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Turn the volume down to 50% + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.5 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.5 + got: + volume_level: 0.9 + conversation_trace: + - event_type: async_process + data: + text: Turn the volume down to 50% + context: + id: 01KHJQWW1MC4TGSZAE0ZADEWF6 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:31.028549+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:31.029677+00:00 + - role: user + content: Turn the volume down to 50% + attachments: null + created: 2026-02-16 08:08:31.028623+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebf110250>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:31.029687+00:00 + duration_ms: 136.263 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-turn_the_volume_down_to_50-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-turn_the_volume_down_to_50-1.yaml new file mode 100644 index 000000000..acf77bb0d --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-turn_the_volume_down_to_50-1.yaml @@ -0,0 +1,168 @@ +--- +uuid: c3c1f88b-4c3c-40fa-9fe3-e2147a181457 +task_id: home7_dk_media_player_media_player-turn_the_volume_down_to_50-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Turn the volume down to 50% + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.5 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.5 + got: + volume_level: 0.9 + conversation_trace: + - event_type: async_process + data: + text: Turn the volume down to 50% + context: + id: 01KHJQWWAA197040DWWCN2CP1H + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:31.306497+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:31.307642+00:00 + - role: user + content: Turn the volume down to 50% + attachments: null + created: 2026-02-16 08:08:31.306569+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebd6c7950>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:31.307651+00:00 + duration_ms: 26.887 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-turn_the_volume_down_to_50-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-turn_the_volume_down_to_50-2.yaml new file mode 100644 index 000000000..58810351d --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-turn_the_volume_down_to_50-2.yaml @@ -0,0 +1,168 @@ +--- +uuid: f2760f7e-3558-4dc2-b3dc-d7156081f850 +task_id: home7_dk_media_player_media_player-turn_the_volume_down_to_50-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Turn the volume down to 50% + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.5 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.5 + got: + volume_level: 0.9 + conversation_trace: + - event_type: async_process + data: + text: Turn the volume down to 50% + context: + id: 01KHJQWWF612VQ5D42E9KJKP4Z + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:31.462385+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:31.463526+00:00 + - role: user + content: Turn the volume down to 50% + attachments: null + created: 2026-02-16 08:08:31.462454+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebd433270>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:31.463535+00:00 + duration_ms: 26.258 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-turn_the_volume_down_to_50-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-turn_the_volume_down_to_50-3.yaml new file mode 100644 index 000000000..1deafb942 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-turn_the_volume_down_to_50-3.yaml @@ -0,0 +1,168 @@ +--- +uuid: e1986ec0-9a2a-43c4-a20e-a68c83a56d5f +task_id: home7_dk_media_player_media_player-turn_the_volume_down_to_50-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Turn the volume down to 50% + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.5 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.5 + got: + volume_level: 0.9 + conversation_trace: + - event_type: async_process + data: + text: Turn the volume down to 50% + context: + id: 01KHJQWWMMRSMYZ0GGMAZTPRJG + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:31.636807+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:31.637982+00:00 + - role: user + content: Turn the volume down to 50% + attachments: null + created: 2026-02-16 08:08:31.636878+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebd6f12d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:31.637992+00:00 + duration_ms: 97.654 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music-0.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music-0.yaml new file mode 100644 index 000000000..81562a4c1 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music-0.yaml @@ -0,0 +1,167 @@ +--- +uuid: 1b83eae3-b7fc-4156-b04b-852a5a8fd398 +task_id: home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Unpause the Rooftop Terrace music + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Unpause the Rooftop Terrace music + context: + id: 01KHJQWR8AZ7Q8JP2NYFPC84J0 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:27.146458+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:27.148417+00:00 + - role: user + content: Unpause the Rooftop Terrace music + attachments: null + created: 2026-02-16 08:08:27.146530+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ecccb1590>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:27.148426+00:00 + duration_ms: 144.488 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music-1.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music-1.yaml new file mode 100644 index 000000000..d7ab63495 --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music-1.yaml @@ -0,0 +1,167 @@ +--- +uuid: ba447832-36fb-4f03-a831-6d7af9d6daf8 +task_id: home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Unpause the Rooftop Terrace music + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Unpause the Rooftop Terrace music + context: + id: 01KHJQWRHQKYH9RGM9D9AK3GC0 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:27.447398+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:27.448539+00:00 + - role: user + content: Unpause the Rooftop Terrace music + attachments: null + created: 2026-02-16 08:08:27.447469+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ecc394460>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:27.448560+00:00 + duration_ms: 102.312 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music-2.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music-2.yaml new file mode 100644 index 000000000..ab4eb490f --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music-2.yaml @@ -0,0 +1,167 @@ +--- +uuid: 3a1850c9-9373-4ffe-9efe-cd9b9f03df83 +task_id: home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Unpause the Rooftop Terrace music + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Unpause the Rooftop Terrace music + context: + id: 01KHJQWRTA2BQQSQY1MY5366SR + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:27.722833+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:27.765199+00:00 + - role: user + content: Unpause the Rooftop Terrace music + attachments: null + created: 2026-02-16 08:08:27.722914+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebfa4eb90>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:27.765220+00:00 + duration_ms: 143.574 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music-3.yaml b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music-3.yaml new file mode 100644 index 000000000..a4624c22e --- /dev/null +++ b/reports/assist-mini/2026.2.2/gemma-3-27b-it/home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music-3.yaml @@ -0,0 +1,167 @@ +--- +uuid: eccbaefb-5390-4c5c-bc51-50b18afaf505 +task_id: home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Unpause the Rooftop Terrace music + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Unpause the Rooftop Terrace music + context: + id: 01KHJQWS435FPDHF4MRBCM380G + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:28.035248+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + created: 2026-02-16 08:08:28.036384+00:00 + - role: user + content: Unpause the Rooftop Terrace music + attachments: null + created: 2026-02-16 08:08:28.035318+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f5ebf2e45c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f5edc749010>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''identify'', ''restart'', ''update'', ''tv'', + ''speaker'', ''receiver'', ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:28.036393+00:00 + duration_ms: 31.274 + tries: 1 diff --git a/reports/assist-mini/2026.2.2/report.csv b/reports/assist-mini/2026.2.2/report.csv new file mode 100644 index 000000000..40c72ad87 --- /dev/null +++ b/reports/assist-mini/2026.2.2/report.csv @@ -0,0 +1,197 @@ +task_id,model_id,category,text,tool_call,response,task_name,label,details +"dom1_pl_lights_lights-dining_room_light_off-0","gemma-3-27b-it","light","Dining room light off","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.dining_room_light': {'expected': {'brightness': None, 'color_mode': None, 'state': 'off'}, 'got': {'brightness': 100, 'color_mode': 'brightness', 'state': 'on'}}}" +"dom1_pl_lights_lights-dining_room_light_off-1","gemma-3-27b-it","light","Dining room light off","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.dining_room_light': {'expected': {'brightness': None, 'color_mode': None, 'state': 'off'}, 'got': {'brightness': 100, 'color_mode': 'brightness', 'state': 'on'}}}" +"dom1_pl_lights_lights-dining_room_light_off-2","gemma-3-27b-it","light","Dining room light off","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.dining_room_light': {'expected': {'brightness': None, 'color_mode': None, 'state': 'off'}, 'got': {'brightness': 100, 'color_mode': 'brightness', 'state': 'on'}}}" +"dom1_pl_lights_lights-dining_room_light_off-3","gemma-3-27b-it","light","Dining room light off","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.dining_room_light': {'expected': {'brightness': None, 'color_mode': None, 'state': 'off'}, 'got': {'brightness': 100, 'color_mode': 'brightness', 'state': 'on'}}}" +"dom1_pl_lights_lights-kitchen_light_off-0","gemma-3-27b-it","light","Kitchen light off","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.kitchen_light': {'expected': {'brightness': None, 'color_mode': None, 'state': 'off'}, 'got': {'brightness': 100, 'color_mode': 'brightness', 'state': 'on'}}}" +"dom1_pl_lights_lights-kitchen_light_off-1","gemma-3-27b-it","light","Kitchen light off","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.kitchen_light': {'expected': {'brightness': None, 'color_mode': None, 'state': 'off'}, 'got': {'brightness': 100, 'color_mode': 'brightness', 'state': 'on'}}}" +"dom1_pl_lights_lights-kitchen_light_off-2","gemma-3-27b-it","light","Kitchen light off","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.kitchen_light': {'expected': {'brightness': None, 'color_mode': None, 'state': 'off'}, 'got': {'brightness': 100, 'color_mode': 'brightness', 'state': 'on'}}}" +"dom1_pl_lights_lights-kitchen_light_off-3","gemma-3-27b-it","light","Kitchen light off","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.kitchen_light': {'expected': {'brightness': None, 'color_mode': None, 'state': 'off'}, 'got': {'brightness': 100, 'color_mode': 'brightness', 'state': 'on'}}}" +"dom1_pl_lights_lights-please_turn_on_the_kitchen_light-0","gemma-3-27b-it","light","Please turn on the kitchen light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.kitchen_light': {'expected': {'brightness': 0, 'color_mode': 'brightness', 'state': 'on'}, 'got': {'brightness': None, 'color_mode': None, 'state': 'off'}}}" +"dom1_pl_lights_lights-please_turn_on_the_kitchen_light-1","gemma-3-27b-it","light","Please turn on the kitchen light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.kitchen_light': {'expected': {'brightness': 0, 'color_mode': 'brightness', 'state': 'on'}, 'got': {'brightness': None, 'color_mode': None, 'state': 'off'}}}" +"dom1_pl_lights_lights-please_turn_on_the_kitchen_light-2","gemma-3-27b-it","light","Please turn on the kitchen light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.kitchen_light': {'expected': {'brightness': 0, 'color_mode': 'brightness', 'state': 'on'}, 'got': {'brightness': None, 'color_mode': None, 'state': 'off'}}}" +"dom1_pl_lights_lights-please_turn_on_the_kitchen_light-3","gemma-3-27b-it","light","Please turn on the kitchen light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.kitchen_light': {'expected': {'brightness': 0, 'color_mode': 'brightness', 'state': 'on'}, 'got': {'brightness': None, 'color_mode': None, 'state': 'off'}}}" +"dom1_pl_lights_lights-turn_on_the_backyard_light-0","gemma-3-27b-it","light","Turn on the Backyard light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.garden_light': {'expected': {'brightness': 0, 'color_mode': 'brightness', 'state': 'on'}, 'got': {'brightness': None, 'color_mode': None, 'state': 'off'}}}" +"dom1_pl_lights_lights-turn_on_the_backyard_light-1","gemma-3-27b-it","light","Turn on the Backyard light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.garden_light': {'expected': {'brightness': 0, 'color_mode': 'brightness', 'state': 'on'}, 'got': {'brightness': None, 'color_mode': None, 'state': 'off'}}}" +"dom1_pl_lights_lights-turn_on_the_backyard_light-2","gemma-3-27b-it","light","Turn on the Backyard light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.garden_light': {'expected': {'brightness': 0, 'color_mode': 'brightness', 'state': 'on'}, 'got': {'brightness': None, 'color_mode': None, 'state': 'off'}}}" +"dom1_pl_lights_lights-turn_on_the_backyard_light-3","gemma-3-27b-it","light","Turn on the Backyard light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.garden_light': {'expected': {'brightness': 0, 'color_mode': 'brightness', 'state': 'on'}, 'got': {'brightness': None, 'color_mode': None, 'state': 'off'}}}" +"dom1_pl_lights_lights-turn_on_the_living_room_light-0","gemma-3-27b-it","light","Turn on the living room light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'brightness': 0, 'color_mode': 'brightness', 'state': 'on'}, 'got': {'brightness': None, 'color_mode': None, 'state': 'off'}}}" +"dom1_pl_lights_lights-turn_on_the_living_room_light-1","gemma-3-27b-it","light","Turn on the living room light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'brightness': 0, 'color_mode': 'brightness', 'state': 'on'}, 'got': {'brightness': None, 'color_mode': None, 'state': 'off'}}}" +"dom1_pl_lights_lights-turn_on_the_living_room_light-2","gemma-3-27b-it","light","Turn on the living room light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'brightness': 0, 'color_mode': 'brightness', 'state': 'on'}, 'got': {'brightness': None, 'color_mode': None, 'state': 'off'}}}" +"dom1_pl_lights_lights-turn_on_the_living_room_light-3","gemma-3-27b-it","light","Turn on the living room light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'brightness': 0, 'color_mode': 'brightness', 'state': 'on'}, 'got': {'brightness': None, 'color_mode': None, 'state': 'off'}}}" +"dom1_pl_todo_todo-add_clean_the_kitchen_to_my_todo_list-0","gemma-3-27b-it","todo","Add clean the kitchen to my todo list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo_todo-add_clean_the_kitchen_to_my_todo_list-1","gemma-3-27b-it","todo","Add clean the kitchen to my todo list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo_todo-add_clean_the_kitchen_to_my_todo_list-2","gemma-3-27b-it","todo","Add clean the kitchen to my todo list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo_todo-add_clean_the_kitchen_to_my_todo_list-3","gemma-3-27b-it","todo","Add clean the kitchen to my todo list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo_todo-add_history_homework_to_my_personal_tasks-0","gemma-3-27b-it","todo","Add history homework to my personal tasks","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo_todo-add_history_homework_to_my_personal_tasks-1","gemma-3-27b-it","todo","Add history homework to my personal tasks","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo_todo-add_history_homework_to_my_personal_tasks-2","gemma-3-27b-it","todo","Add history homework to my personal tasks","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo_todo-add_history_homework_to_my_personal_tasks-3","gemma-3-27b-it","todo","Add history homework to my personal tasks","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo_todo-add_history_homework_to_my_tasks-0","gemma-3-27b-it","todo","Add history homework to my tasks","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo_todo-add_history_homework_to_my_tasks-1","gemma-3-27b-it","todo","Add history homework to my tasks","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo_todo-add_history_homework_to_my_tasks-2","gemma-3-27b-it","todo","Add history homework to my tasks","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo_todo-add_history_homework_to_my_tasks-3","gemma-3-27b-it","todo","Add history homework to my tasks","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo_todo-put_history_homework_on_personal_tasks_todo_list-0","gemma-3-27b-it","todo","Put history homework on personal tasks todo list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo_todo-put_history_homework_on_personal_tasks_todo_list-1","gemma-3-27b-it","todo","Put history homework on personal tasks todo list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo_todo-put_history_homework_on_personal_tasks_todo_list-2","gemma-3-27b-it","todo","Put history homework on personal tasks todo list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo_todo-put_history_homework_on_personal_tasks_todo_list-3","gemma-3-27b-it","todo","Put history homework on personal tasks todo list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"home1_us_cover_garage_cover_garage-close_the_garage_door-0","gemma-3-27b-it","cover","Close the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home1_us_cover_garage_cover_garage-close_the_garage_door-1","gemma-3-27b-it","cover","Close the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home1_us_cover_garage_cover_garage-close_the_garage_door-2","gemma-3-27b-it","cover","Close the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home1_us_cover_garage_cover_garage-close_the_garage_door-3","gemma-3-27b-it","cover","Close the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home1_us_cover_garage_cover_garage-open_the_garage_door-0","gemma-3-27b-it","cover","Open the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home1_us_cover_garage_cover_garage-open_the_garage_door-1","gemma-3-27b-it","cover","Open the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home1_us_cover_garage_cover_garage-open_the_garage_door-2","gemma-3-27b-it","cover","Open the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home1_us_cover_garage_cover_garage-open_the_garage_door-3","gemma-3-27b-it","cover","Open the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home1_us_cover_garage_cover_garage-please_close_the_garage_door-0","gemma-3-27b-it","cover","Please close the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home1_us_cover_garage_cover_garage-please_close_the_garage_door-1","gemma-3-27b-it","cover","Please close the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home1_us_cover_garage_cover_garage-please_close_the_garage_door-2","gemma-3-27b-it","cover","Please close the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home1_us_cover_garage_cover_garage-please_close_the_garage_door-3","gemma-3-27b-it","cover","Please close the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home1_us_cover_garage_cover_garage-please_open_the_garage_door-0","gemma-3-27b-it","cover","Please open the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home1_us_cover_garage_cover_garage-please_open_the_garage_door-1","gemma-3-27b-it","cover","Please open the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home1_us_cover_garage_cover_garage-please_open_the_garage_door-2","gemma-3-27b-it","cover","Please open the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home1_us_cover_garage_cover_garage-please_open_the_garage_door-3","gemma-3-27b-it","cover","Please open the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home1_us_lock_smart_lock-lock_smart_lock-0","gemma-3-27b-it","lock","Lock smart lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'locked'}, 'got': {'state': 'unlocked'}}}" +"home1_us_lock_smart_lock-lock_smart_lock-1","gemma-3-27b-it","lock","Lock smart lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'locked'}, 'got': {'state': 'unlocked'}}}" +"home1_us_lock_smart_lock-lock_smart_lock-2","gemma-3-27b-it","lock","Lock smart lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'locked'}, 'got': {'state': 'unlocked'}}}" +"home1_us_lock_smart_lock-lock_smart_lock-3","gemma-3-27b-it","lock","Lock smart lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'locked'}, 'got': {'state': 'unlocked'}}}" +"home1_us_lock_smart_lock-lock_the_entry_lock-0","gemma-3-27b-it","lock","Lock the entry lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'locked'}, 'got': {'state': 'unlocked'}}}" +"home1_us_lock_smart_lock-lock_the_entry_lock-1","gemma-3-27b-it","lock","Lock the entry lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'locked'}, 'got': {'state': 'unlocked'}}}" +"home1_us_lock_smart_lock-lock_the_entry_lock-2","gemma-3-27b-it","lock","Lock the entry lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'locked'}, 'got': {'state': 'unlocked'}}}" +"home1_us_lock_smart_lock-lock_the_entry_lock-3","gemma-3-27b-it","lock","Lock the entry lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'locked'}, 'got': {'state': 'unlocked'}}}" +"home1_us_lock_smart_lock-unlock_smart_lock-0","gemma-3-27b-it","lock","Unlock smart lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'unlocked'}, 'got': {'state': 'locked'}}}" +"home1_us_lock_smart_lock-unlock_smart_lock-1","gemma-3-27b-it","lock","Unlock smart lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'unlocked'}, 'got': {'state': 'locked'}}}" +"home1_us_lock_smart_lock-unlock_smart_lock-2","gemma-3-27b-it","lock","Unlock smart lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'unlocked'}, 'got': {'state': 'locked'}}}" +"home1_us_lock_smart_lock-unlock_smart_lock-3","gemma-3-27b-it","lock","Unlock smart lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'unlocked'}, 'got': {'state': 'locked'}}}" +"home1_us_lock_smart_lock-unlock_the_entry_lock-0","gemma-3-27b-it","lock","Unlock the entry lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'unlocked'}, 'got': {'state': 'locked'}}}" +"home1_us_lock_smart_lock-unlock_the_entry_lock-1","gemma-3-27b-it","lock","Unlock the entry lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'unlocked'}, 'got': {'state': 'locked'}}}" +"home1_us_lock_smart_lock-unlock_the_entry_lock-2","gemma-3-27b-it","lock","Unlock the entry lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'unlocked'}, 'got': {'state': 'locked'}}}" +"home1_us_lock_smart_lock-unlock_the_entry_lock-3","gemma-3-27b-it","lock","Unlock the entry lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'unlocked'}, 'got': {'state': 'locked'}}}" +"home1_us_vacuum_vacuum-return_roborock_downstairs_to_base-0","gemma-3-27b-it","vacuum","Return Roborock Downstairs to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum_vacuum-return_roborock_downstairs_to_base-1","gemma-3-27b-it","vacuum","Return Roborock Downstairs to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum_vacuum-return_roborock_downstairs_to_base-2","gemma-3-27b-it","vacuum","Return Roborock Downstairs to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum_vacuum-return_roborock_downstairs_to_base-3","gemma-3-27b-it","vacuum","Return Roborock Downstairs to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum_vacuum-return_vacuum_in_the_living_room_to_base-0","gemma-3-27b-it","vacuum","Return vacuum in the living room to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum_vacuum-return_vacuum_in_the_living_room_to_base-1","gemma-3-27b-it","vacuum","Return vacuum in the living room to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum_vacuum-return_vacuum_in_the_living_room_to_base-2","gemma-3-27b-it","vacuum","Return vacuum in the living room to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum_vacuum-return_vacuum_in_the_living_room_to_base-3","gemma-3-27b-it","vacuum","Return vacuum in the living room to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum-0","gemma-3-27b-it","vacuum","Start Roborock Downstairs vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum-1","gemma-3-27b-it","vacuum","Start Roborock Downstairs vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum-2","gemma-3-27b-it","vacuum","Start Roborock Downstairs vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum-3","gemma-3-27b-it","vacuum","Start Roborock Downstairs vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum_vacuum-start_vacuum_in_the_living_room-0","gemma-3-27b-it","vacuum","Start vacuum in the living room","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum_vacuum-start_vacuum_in_the_living_room-1","gemma-3-27b-it","vacuum","Start vacuum in the living room","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum_vacuum-start_vacuum_in_the_living_room-2","gemma-3-27b-it","vacuum","Start vacuum in the living room","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum_vacuum-start_vacuum_in_the_living_room-3","gemma-3-27b-it","vacuum","Start vacuum in the living room","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home2_ru_valve_water_valve-close_the_front_yard_valve-0","gemma-3-27b-it","valve","close the front yard valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 0, 'state': 'closed'}, 'got': {'current_position': 100, 'state': 'open'}}}" +"home2_ru_valve_water_valve-close_the_front_yard_valve-1","gemma-3-27b-it","valve","close the front yard valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 0, 'state': 'closed'}, 'got': {'current_position': 100, 'state': 'open'}}}" +"home2_ru_valve_water_valve-close_the_front_yard_valve-2","gemma-3-27b-it","valve","close the front yard valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 0, 'state': 'closed'}, 'got': {'current_position': 100, 'state': 'open'}}}" +"home2_ru_valve_water_valve-close_the_front_yard_valve-3","gemma-3-27b-it","valve","close the front yard valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 0, 'state': 'closed'}, 'got': {'current_position': 100, 'state': 'open'}}}" +"home2_ru_valve_water_valve-close_the_irrigation_valve-0","gemma-3-27b-it","valve","close the irrigation valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 0, 'state': 'closed'}, 'got': {'current_position': 100, 'state': 'open'}}}" +"home2_ru_valve_water_valve-close_the_irrigation_valve-1","gemma-3-27b-it","valve","close the irrigation valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 0, 'state': 'closed'}, 'got': {'current_position': 100, 'state': 'open'}}}" +"home2_ru_valve_water_valve-close_the_irrigation_valve-2","gemma-3-27b-it","valve","close the irrigation valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 0, 'state': 'closed'}, 'got': {'current_position': 100, 'state': 'open'}}}" +"home2_ru_valve_water_valve-close_the_irrigation_valve-3","gemma-3-27b-it","valve","close the irrigation valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 0, 'state': 'closed'}, 'got': {'current_position': 100, 'state': 'open'}}}" +"home2_ru_valve_water_valve-open_the_front_yard_valve-0","gemma-3-27b-it","valve","open the front yard valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_valve_water_valve-open_the_front_yard_valve-1","gemma-3-27b-it","valve","open the front yard valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_valve_water_valve-open_the_front_yard_valve-2","gemma-3-27b-it","valve","open the front yard valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_valve_water_valve-open_the_front_yard_valve-3","gemma-3-27b-it","valve","open the front yard valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_valve_water_valve-open_the_irrigation_valve-0","gemma-3-27b-it","valve","open the irrigation valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_valve_water_valve-open_the_irrigation_valve-1","gemma-3-27b-it","valve","open the irrigation valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_valve_water_valve-open_the_irrigation_valve-2","gemma-3-27b-it","valve","open the irrigation valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_valve_water_valve-open_the_irrigation_valve-3","gemma-3-27b-it","valve","open the irrigation valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_valve_water_valve-set_the_irrigation_valve_to_50-0","gemma-3-27b-it","valve","set the irrigation valve to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 50, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_valve_water_valve-set_the_irrigation_valve_to_50-1","gemma-3-27b-it","valve","set the irrigation valve to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 50, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_valve_water_valve-set_the_irrigation_valve_to_50-2","gemma-3-27b-it","valve","set the irrigation valve to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 50, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_valve_water_valve-set_the_irrigation_valve_to_50-3","gemma-3-27b-it","valve","set the irrigation valve to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 50, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home5_cn_fan_fan-turn_off_the_bedroom_1_fan-0","gemma-3-27b-it","fan","Turn off the bedroom 1 fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 0, 'state': 'off'}, 'got': {'percentage': 100, 'state': 'on'}}}" +"home5_cn_fan_fan-turn_off_the_bedroom_1_fan-1","gemma-3-27b-it","fan","Turn off the bedroom 1 fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 0, 'state': 'off'}, 'got': {'percentage': 100, 'state': 'on'}}}" +"home5_cn_fan_fan-turn_off_the_bedroom_1_fan-2","gemma-3-27b-it","fan","Turn off the bedroom 1 fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 0, 'state': 'off'}, 'got': {'percentage': 100, 'state': 'on'}}}" +"home5_cn_fan_fan-turn_off_the_bedroom_1_fan-3","gemma-3-27b-it","fan","Turn off the bedroom 1 fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 0, 'state': 'off'}, 'got': {'percentage': 100, 'state': 'on'}}}" +"home5_cn_fan_fan-turn_off_the_bedroom_fan-0","gemma-3-27b-it","fan","Turn off the bedroom fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 0, 'state': 'off'}, 'got': {'percentage': 100, 'state': 'on'}}}" +"home5_cn_fan_fan-turn_off_the_bedroom_fan-1","gemma-3-27b-it","fan","Turn off the bedroom fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 0, 'state': 'off'}, 'got': {'percentage': 100, 'state': 'on'}}}" +"home5_cn_fan_fan-turn_off_the_bedroom_fan-2","gemma-3-27b-it","fan","Turn off the bedroom fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 0, 'state': 'off'}, 'got': {'percentage': 100, 'state': 'on'}}}" +"home5_cn_fan_fan-turn_off_the_bedroom_fan-3","gemma-3-27b-it","fan","Turn off the bedroom fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 0, 'state': 'off'}, 'got': {'percentage': 100, 'state': 'on'}}}" +"home5_cn_fan_fan-turn_on_the_bedroom_1_fan-0","gemma-3-27b-it","fan","Turn on the bedroom 1 fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home5_cn_fan_fan-turn_on_the_bedroom_1_fan-1","gemma-3-27b-it","fan","Turn on the bedroom 1 fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home5_cn_fan_fan-turn_on_the_bedroom_1_fan-2","gemma-3-27b-it","fan","Turn on the bedroom 1 fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home5_cn_fan_fan-turn_on_the_bedroom_1_fan-3","gemma-3-27b-it","fan","Turn on the bedroom 1 fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home5_cn_fan_fan-turn_on_the_bedroom_fan-0","gemma-3-27b-it","fan","Turn on the bedroom fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home5_cn_fan_fan-turn_on_the_bedroom_fan-1","gemma-3-27b-it","fan","Turn on the bedroom fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home5_cn_fan_fan-turn_on_the_bedroom_fan-2","gemma-3-27b-it","fan","Turn on the bedroom fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home5_cn_fan_fan-turn_on_the_bedroom_fan-3","gemma-3-27b-it","fan","Turn on the bedroom fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home5_cn_light_light-set_the_living_room_light_to_50_brightness-0","gemma-3-27b-it","light","Set the living room light to 50% brightness","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'brightness': 128}, 'got': {'brightness': 255}}}" +"home5_cn_light_light-set_the_living_room_light_to_50_brightness-1","gemma-3-27b-it","light","Set the living room light to 50% brightness","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'brightness': 128}, 'got': {'brightness': 255}}}" +"home5_cn_light_light-set_the_living_room_light_to_50_brightness-2","gemma-3-27b-it","light","Set the living room light to 50% brightness","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'brightness': 128}, 'got': {'brightness': 255}}}" +"home5_cn_light_light-set_the_living_room_light_to_50_brightness-3","gemma-3-27b-it","light","Set the living room light to 50% brightness","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'brightness': 128}, 'got': {'brightness': 255}}}" +"home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains-0","gemma-3-27b-it","cover","Close the living room curtains","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains-1","gemma-3-27b-it","cover","Close the living room curtains","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains-2","gemma-3-27b-it","cover","Close the living room curtains","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains-3","gemma-3-27b-it","cover","Close the living room curtains","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home7_dk_cover_curtain_cover_curtain-close_the_living_room_smart_curtain-0","gemma-3-27b-it","cover","Close the living room smart curtain","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home7_dk_cover_curtain_cover_curtain-close_the_living_room_smart_curtain-1","gemma-3-27b-it","cover","Close the living room smart curtain","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home7_dk_cover_curtain_cover_curtain-close_the_living_room_smart_curtain-2","gemma-3-27b-it","cover","Close the living room smart curtain","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home7_dk_cover_curtain_cover_curtain-close_the_living_room_smart_curtain-3","gemma-3-27b-it","cover","Close the living room smart curtain","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home7_dk_cover_curtain_cover_curtain-close_the_smart_curtains-0","gemma-3-27b-it","cover","Close the smart curtains","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home7_dk_cover_curtain_cover_curtain-close_the_smart_curtains-1","gemma-3-27b-it","cover","Close the smart curtains","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home7_dk_cover_curtain_cover_curtain-close_the_smart_curtains-2","gemma-3-27b-it","cover","Close the smart curtains","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home7_dk_cover_curtain_cover_curtain-close_the_smart_curtains-3","gemma-3-27b-it","cover","Close the smart curtains","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains-0","gemma-3-27b-it","cover","Open the living room curtains","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains-1","gemma-3-27b-it","cover","Open the living room curtains","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains-2","gemma-3-27b-it","cover","Open the living room curtains","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains-3","gemma-3-27b-it","cover","Open the living room curtains","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain_cover_curtain-open_the_living_room_smart_curtain-0","gemma-3-27b-it","cover","Open the living room smart curtain","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain_cover_curtain-open_the_living_room_smart_curtain-1","gemma-3-27b-it","cover","Open the living room smart curtain","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain_cover_curtain-open_the_living_room_smart_curtain-2","gemma-3-27b-it","cover","Open the living room smart curtain","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain_cover_curtain-open_the_living_room_smart_curtain-3","gemma-3-27b-it","cover","Open the living room smart curtain","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain_cover_curtain-open_the_smart_curtain-0","gemma-3-27b-it","cover","Open the smart curtain","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain_cover_curtain-open_the_smart_curtain-1","gemma-3-27b-it","cover","Open the smart curtain","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain_cover_curtain-open_the_smart_curtain-2","gemma-3-27b-it","cover","Open the smart curtain","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain_cover_curtain-open_the_smart_curtain-3","gemma-3-27b-it","cover","Open the smart curtain","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain_cover_curtain-set_the_living_room_curtains_to_50-0","gemma-3-27b-it","cover","Set the living room curtains to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain_cover_curtain-set_the_living_room_curtains_to_50-1","gemma-3-27b-it","cover","Set the living room curtains to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain_cover_curtain-set_the_living_room_curtains_to_50-2","gemma-3-27b-it","cover","Set the living room curtains to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain_cover_curtain-set_the_living_room_curtains_to_50-3","gemma-3-27b-it","cover","Set the living room curtains to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain_cover_curtain-set_the_smart_curtain_to_50-0","gemma-3-27b-it","cover","Set the smart curtain to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain_cover_curtain-set_the_smart_curtain_to_50-1","gemma-3-27b-it","cover","Set the smart curtain to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain_cover_curtain-set_the_smart_curtain_to_50-2","gemma-3-27b-it","cover","Set the smart curtain to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain_cover_curtain-set_the_smart_curtain_to_50-3","gemma-3-27b-it","cover","Set the smart curtain to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_media_player_media_player-mute_the_outdoor_speakers-0","gemma-3-27b-it","media-player","Mute the outdoor speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.0}, 'got': {'volume_level': 0.6}}}" +"home7_dk_media_player_media_player-mute_the_outdoor_speakers-1","gemma-3-27b-it","media-player","Mute the outdoor speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.0}, 'got': {'volume_level': 0.6}}}" +"home7_dk_media_player_media_player-mute_the_outdoor_speakers-2","gemma-3-27b-it","media-player","Mute the outdoor speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.0}, 'got': {'volume_level': 0.6}}}" +"home7_dk_media_player_media_player-mute_the_outdoor_speakers-3","gemma-3-27b-it","media-player","Mute the outdoor speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.0}, 'got': {'volume_level': 0.6}}}" +"home7_dk_media_player_media_player-pause_outdoor_speakers-0","gemma-3-27b-it","media-player","Pause Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player_media_player-pause_outdoor_speakers-1","gemma-3-27b-it","media-player","Pause Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player_media_player-pause_outdoor_speakers-2","gemma-3-27b-it","media-player","Pause Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player_media_player-pause_outdoor_speakers-3","gemma-3-27b-it","media-player","Pause Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player_media_player-pause_the_rooftop_terrace_music-0","gemma-3-27b-it","media-player","Pause the Rooftop Terrace music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player_media_player-pause_the_rooftop_terrace_music-1","gemma-3-27b-it","media-player","Pause the Rooftop Terrace music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player_media_player-pause_the_rooftop_terrace_music-2","gemma-3-27b-it","media-player","Pause the Rooftop Terrace music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player_media_player-pause_the_rooftop_terrace_music-3","gemma-3-27b-it","media-player","Pause the Rooftop Terrace music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player_media_player-resume_outdoor_speakers-0","gemma-3-27b-it","media-player","Resume Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" +"home7_dk_media_player_media_player-resume_outdoor_speakers-1","gemma-3-27b-it","media-player","Resume Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" +"home7_dk_media_player_media_player-resume_outdoor_speakers-2","gemma-3-27b-it","media-player","Resume Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" +"home7_dk_media_player_media_player-resume_outdoor_speakers-3","gemma-3-27b-it","media-player","Resume Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" +"home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50-0","gemma-3-27b-it","media-player","Set outdoor speakers volume to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.5}, 'got': {'volume_level': 0.9}}}" +"home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50-1","gemma-3-27b-it","media-player","Set outdoor speakers volume to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.5}, 'got': {'volume_level': 0.9}}}" +"home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50-2","gemma-3-27b-it","media-player","Set outdoor speakers volume to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.5}, 'got': {'volume_level': 0.9}}}" +"home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50-3","gemma-3-27b-it","media-player","Set outdoor speakers volume to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.5}, 'got': {'volume_level': 0.9}}}" +"home7_dk_media_player_media_player-skip_song_on_outdoor_speakers-0","gemma-3-27b-it","media-player","Skip song on outdoor speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player_media_player-skip_song_on_outdoor_speakers-1","gemma-3-27b-it","media-player","Skip song on outdoor speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player_media_player-skip_song_on_outdoor_speakers-2","gemma-3-27b-it","media-player","Skip song on outdoor speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player_media_player-skip_song_on_outdoor_speakers-3","gemma-3-27b-it","media-player","Skip song on outdoor speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player_media_player-skip_to_the_next_track_on_the_outdoor_speakers-0","gemma-3-27b-it","media-player","Skip to the next track on the outdoor speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player_media_player-skip_to_the_next_track_on_the_outdoor_speakers-1","gemma-3-27b-it","media-player","Skip to the next track on the outdoor speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player_media_player-skip_to_the_next_track_on_the_outdoor_speakers-2","gemma-3-27b-it","media-player","Skip to the next track on the outdoor speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player_media_player-skip_to_the_next_track_on_the_outdoor_speakers-3","gemma-3-27b-it","media-player","Skip to the next track on the outdoor speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player_media_player-turn_off_the_outdoor_speakers-0","gemma-3-27b-it","media-player","Turn off the Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'off'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player_media_player-turn_off_the_outdoor_speakers-1","gemma-3-27b-it","media-player","Turn off the Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'off'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player_media_player-turn_off_the_outdoor_speakers-2","gemma-3-27b-it","media-player","Turn off the Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'off'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player_media_player-turn_off_the_outdoor_speakers-3","gemma-3-27b-it","media-player","Turn off the Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'off'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player_media_player-turn_the_volume_down_to_50-0","gemma-3-27b-it","media-player","Turn the volume down to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.5}, 'got': {'volume_level': 0.9}}}" +"home7_dk_media_player_media_player-turn_the_volume_down_to_50-1","gemma-3-27b-it","media-player","Turn the volume down to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.5}, 'got': {'volume_level': 0.9}}}" +"home7_dk_media_player_media_player-turn_the_volume_down_to_50-2","gemma-3-27b-it","media-player","Turn the volume down to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.5}, 'got': {'volume_level': 0.9}}}" +"home7_dk_media_player_media_player-turn_the_volume_down_to_50-3","gemma-3-27b-it","media-player","Turn the volume down to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.5}, 'got': {'volume_level': 0.9}}}" +"home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music-0","gemma-3-27b-it","media-player","Unpause the Rooftop Terrace music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" +"home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music-1","gemma-3-27b-it","media-player","Unpause the Rooftop Terrace music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" +"home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music-2","gemma-3-27b-it","media-player","Unpause the Rooftop Terrace music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" +"home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music-3","gemma-3-27b-it","media-player","Unpause the Rooftop Terrace music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" diff --git a/reports/assist-mini/2026.2.2/reports-by-category.yaml b/reports/assist-mini/2026.2.2/reports-by-category.yaml new file mode 100644 index 000000000..f036596c6 --- /dev/null +++ b/reports/assist-mini/2026.2.2/reports-by-category.yaml @@ -0,0 +1,42 @@ +--- +- category: cover + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 48 +- category: fan + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 16 +- category: light + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 24 +- category: lock + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 16 +- category: media-player + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 40 +- category: todo + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 16 +- category: vacuum + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 16 +- category: valve + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 20 + diff --git a/reports/assist-mini/2026.2.2/reports-by-model-category.yaml b/reports/assist-mini/2026.2.2/reports-by-model-category.yaml new file mode 100644 index 000000000..1c3762973 --- /dev/null +++ b/reports/assist-mini/2026.2.2/reports-by-model-category.yaml @@ -0,0 +1,42 @@ +--- +- model_id-category: gemma-3-27b-it-cover + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 48 +- model_id-category: gemma-3-27b-it-fan + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 16 +- model_id-category: gemma-3-27b-it-light + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 24 +- model_id-category: gemma-3-27b-it-lock + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 16 +- model_id-category: gemma-3-27b-it-media-player + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 40 +- model_id-category: gemma-3-27b-it-todo + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 16 +- model_id-category: gemma-3-27b-it-vacuum + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 16 +- model_id-category: gemma-3-27b-it-valve + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 20 + diff --git a/reports/assist-mini/2026.2.2/reports-by-model-test-name.yaml b/reports/assist-mini/2026.2.2/reports-by-model-test-name.yaml new file mode 100644 index 000000000..c7cfdda65 --- /dev/null +++ b/reports/assist-mini/2026.2.2/reports-by-model-test-name.yaml @@ -0,0 +1,22 @@ +--- +- model_id-task_name: gemma-3-27b-it-eval-test_expect_llm_tool_call_args + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 0 +- model_id-task_name: gemma-3-27b-it-eval-test_expect_llm_tool_call_name + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 0 +- model_id-task_name: gemma-3-27b-it-eval-test_expect_response + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 0 +- model_id-task_name: gemma-3-27b-it-eval-test_expected_states + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 196 + diff --git a/reports/assist-mini/2026.2.2/reports-by-task-id.yaml b/reports/assist-mini/2026.2.2/reports-by-task-id.yaml new file mode 100644 index 000000000..d0c541502 --- /dev/null +++ b/reports/assist-mini/2026.2.2/reports-by-task-id.yaml @@ -0,0 +1,982 @@ +--- +- task_id: dom1_pl_lights_lights-dining_room_light_off-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights_lights-dining_room_light_off-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights_lights-dining_room_light_off-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights_lights-dining_room_light_off-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights_lights-kitchen_light_off-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights_lights-kitchen_light_off-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights_lights-kitchen_light_off-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights_lights-kitchen_light_off-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights_lights-please_turn_on_the_kitchen_light-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights_lights-please_turn_on_the_kitchen_light-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights_lights-please_turn_on_the_kitchen_light-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights_lights-please_turn_on_the_kitchen_light-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights_lights-turn_on_the_backyard_light-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights_lights-turn_on_the_backyard_light-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights_lights-turn_on_the_backyard_light-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights_lights-turn_on_the_backyard_light-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights_lights-turn_on_the_living_room_light-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights_lights-turn_on_the_living_room_light-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights_lights-turn_on_the_living_room_light-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights_lights-turn_on_the_living_room_light-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo_todo-add_clean_the_kitchen_to_my_todo_list-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo_todo-add_clean_the_kitchen_to_my_todo_list-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo_todo-add_clean_the_kitchen_to_my_todo_list-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo_todo-add_clean_the_kitchen_to_my_todo_list-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo_todo-add_history_homework_to_my_personal_tasks-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo_todo-add_history_homework_to_my_personal_tasks-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo_todo-add_history_homework_to_my_personal_tasks-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo_todo-add_history_homework_to_my_personal_tasks-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo_todo-add_history_homework_to_my_tasks-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo_todo-add_history_homework_to_my_tasks-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo_todo-add_history_homework_to_my_tasks-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo_todo-add_history_homework_to_my_tasks-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo_todo-put_history_homework_on_personal_tasks_todo_list-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo_todo-put_history_homework_on_personal_tasks_todo_list-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo_todo-put_history_homework_on_personal_tasks_todo_list-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo_todo-put_history_homework_on_personal_tasks_todo_list-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage_cover_garage-close_the_garage_door-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage_cover_garage-close_the_garage_door-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage_cover_garage-close_the_garage_door-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage_cover_garage-close_the_garage_door-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage_cover_garage-open_the_garage_door-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage_cover_garage-open_the_garage_door-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage_cover_garage-open_the_garage_door-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage_cover_garage-open_the_garage_door-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage_cover_garage-please_close_the_garage_door-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage_cover_garage-please_close_the_garage_door-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage_cover_garage-please_close_the_garage_door-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage_cover_garage-please_close_the_garage_door-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage_cover_garage-please_open_the_garage_door-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage_cover_garage-please_open_the_garage_door-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage_cover_garage-please_open_the_garage_door-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage_cover_garage-please_open_the_garage_door-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_lock_smart_lock-lock_smart_lock-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_lock_smart_lock-lock_smart_lock-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_lock_smart_lock-lock_smart_lock-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_lock_smart_lock-lock_smart_lock-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_lock_smart_lock-lock_the_entry_lock-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_lock_smart_lock-lock_the_entry_lock-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_lock_smart_lock-lock_the_entry_lock-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_lock_smart_lock-lock_the_entry_lock-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_lock_smart_lock-unlock_smart_lock-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_lock_smart_lock-unlock_smart_lock-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_lock_smart_lock-unlock_smart_lock-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_lock_smart_lock-unlock_smart_lock-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_lock_smart_lock-unlock_the_entry_lock-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_lock_smart_lock-unlock_the_entry_lock-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_lock_smart_lock-unlock_the_entry_lock-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_lock_smart_lock-unlock_the_entry_lock-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum_vacuum-return_roborock_downstairs_to_base-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum_vacuum-return_roborock_downstairs_to_base-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum_vacuum-return_roborock_downstairs_to_base-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum_vacuum-return_roborock_downstairs_to_base-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum_vacuum-return_vacuum_in_the_living_room_to_base-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum_vacuum-return_vacuum_in_the_living_room_to_base-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum_vacuum-return_vacuum_in_the_living_room_to_base-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum_vacuum-return_vacuum_in_the_living_room_to_base-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum_vacuum-start_vacuum_in_the_living_room-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum_vacuum-start_vacuum_in_the_living_room-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum_vacuum-start_vacuum_in_the_living_room-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum_vacuum-start_vacuum_in_the_living_room-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_valve_water_valve-close_the_front_yard_valve-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_valve_water_valve-close_the_front_yard_valve-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_valve_water_valve-close_the_front_yard_valve-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_valve_water_valve-close_the_front_yard_valve-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_valve_water_valve-close_the_irrigation_valve-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_valve_water_valve-close_the_irrigation_valve-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_valve_water_valve-close_the_irrigation_valve-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_valve_water_valve-close_the_irrigation_valve-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_valve_water_valve-open_the_front_yard_valve-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_valve_water_valve-open_the_front_yard_valve-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_valve_water_valve-open_the_front_yard_valve-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_valve_water_valve-open_the_front_yard_valve-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_valve_water_valve-open_the_irrigation_valve-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_valve_water_valve-open_the_irrigation_valve-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_valve_water_valve-open_the_irrigation_valve-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_valve_water_valve-open_the_irrigation_valve-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_valve_water_valve-set_the_irrigation_valve_to_50-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_valve_water_valve-set_the_irrigation_valve_to_50-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_valve_water_valve-set_the_irrigation_valve_to_50-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_valve_water_valve-set_the_irrigation_valve_to_50-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan_fan-turn_off_the_bedroom_1_fan-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan_fan-turn_off_the_bedroom_1_fan-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan_fan-turn_off_the_bedroom_1_fan-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan_fan-turn_off_the_bedroom_1_fan-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan_fan-turn_off_the_bedroom_fan-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan_fan-turn_off_the_bedroom_fan-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan_fan-turn_off_the_bedroom_fan-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan_fan-turn_off_the_bedroom_fan-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan_fan-turn_on_the_bedroom_1_fan-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan_fan-turn_on_the_bedroom_1_fan-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan_fan-turn_on_the_bedroom_1_fan-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan_fan-turn_on_the_bedroom_1_fan-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan_fan-turn_on_the_bedroom_fan-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan_fan-turn_on_the_bedroom_fan-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan_fan-turn_on_the_bedroom_fan-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan_fan-turn_on_the_bedroom_fan-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_light_light-set_the_living_room_light_to_50_brightness-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_light_light-set_the_living_room_light_to_50_brightness-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_light_light-set_the_living_room_light_to_50_brightness-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_light_light-set_the_living_room_light_to_50_brightness-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-close_the_living_room_smart_curtain-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-close_the_living_room_smart_curtain-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-close_the_living_room_smart_curtain-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-close_the_living_room_smart_curtain-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-close_the_smart_curtains-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-close_the_smart_curtains-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-close_the_smart_curtains-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-close_the_smart_curtains-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-open_the_living_room_smart_curtain-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-open_the_living_room_smart_curtain-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-open_the_living_room_smart_curtain-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-open_the_living_room_smart_curtain-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-open_the_smart_curtain-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-open_the_smart_curtain-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-open_the_smart_curtain-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-open_the_smart_curtain-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-set_the_living_room_curtains_to_50-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-set_the_living_room_curtains_to_50-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-set_the_living_room_curtains_to_50-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-set_the_living_room_curtains_to_50-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-set_the_smart_curtain_to_50-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-set_the_smart_curtain_to_50-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-set_the_smart_curtain_to_50-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain_cover_curtain-set_the_smart_curtain_to_50-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-mute_the_outdoor_speakers-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-mute_the_outdoor_speakers-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-mute_the_outdoor_speakers-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-mute_the_outdoor_speakers-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-pause_outdoor_speakers-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-pause_outdoor_speakers-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-pause_outdoor_speakers-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-pause_outdoor_speakers-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-pause_the_rooftop_terrace_music-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-pause_the_rooftop_terrace_music-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-pause_the_rooftop_terrace_music-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-pause_the_rooftop_terrace_music-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-resume_outdoor_speakers-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-resume_outdoor_speakers-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-resume_outdoor_speakers-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-resume_outdoor_speakers-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-skip_song_on_outdoor_speakers-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-skip_song_on_outdoor_speakers-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-skip_song_on_outdoor_speakers-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-skip_song_on_outdoor_speakers-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-skip_to_the_next_track_on_the_outdoor_speakers-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-skip_to_the_next_track_on_the_outdoor_speakers-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-skip_to_the_next_track_on_the_outdoor_speakers-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-skip_to_the_next_track_on_the_outdoor_speakers-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-turn_off_the_outdoor_speakers-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-turn_off_the_outdoor_speakers-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-turn_off_the_outdoor_speakers-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-turn_off_the_outdoor_speakers-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-turn_the_volume_down_to_50-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-turn_the_volume_down_to_50-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-turn_the_volume_down_to_50-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-turn_the_volume_down_to_50-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 + diff --git a/reports/assist-mini/2026.2.2/reports-by-test-name.yaml b/reports/assist-mini/2026.2.2/reports-by-test-name.yaml new file mode 100644 index 000000000..7b4e74408 --- /dev/null +++ b/reports/assist-mini/2026.2.2/reports-by-test-name.yaml @@ -0,0 +1,22 @@ +--- +- task_name: eval-test_expect_llm_tool_call_args + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 0 +- task_name: eval-test_expect_llm_tool_call_name + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 0 +- task_name: eval-test_expect_response + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 0 +- task_name: eval-test_expected_states + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 196 + diff --git a/reports/assist-mini/2026.2.2/reports-token-stats.yaml b/reports/assist-mini/2026.2.2/reports-token-stats.yaml new file mode 100644 index 000000000..63b69c89f --- /dev/null +++ b/reports/assist-mini/2026.2.2/reports-token-stats.yaml @@ -0,0 +1,2 @@ +--- [] + diff --git a/reports/assist-mini/2026.2.2/reports.yaml b/reports/assist-mini/2026.2.2/reports.yaml new file mode 100644 index 000000000..d4a809459 --- /dev/null +++ b/reports/assist-mini/2026.2.2/reports.yaml @@ -0,0 +1,7 @@ +--- +- model_id: gemma-3-27b-it + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 196 + diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/_scrape_context.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/_scrape_context.yaml new file mode 100644 index 000000000..56b6f7efe --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/_scrape_context.yaml @@ -0,0 +1,19 @@ +--- +uuid: 1d9bdd50-5c28-41fd-9164-73bf83d8d371 +timestamp: 2026-02-16 08:07:42.929729 +scrape_config: + dataset: assist + dataset_path: datasets/assist + dataset_version: v2 + model_id: gemma-3-27b-it + model_output_path: reports/assist/2026.2.2 +version: 2026.2.2 +context: + user: runner + argv: + - /home/runner/work/openrouter-benchmarks/openrouter-benchmarks/.venv/bin/pytest + - home_assistant_datasets/tool/assist/collect + - --models=gemma-3-27b-it + - --dataset=datasets/assist/ + - --model_output_dir=reports/assist/2026.2.2 +notes: '' diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-kitchen_light_off-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-kitchen_light_off-0.yaml new file mode 100644 index 000000000..f23d8ce72 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-kitchen_light_off-0.yaml @@ -0,0 +1,294 @@ +--- +uuid: c6d49669-503f-43a0-aa7e-e70c68242777 +task_id: dom1_pl_lights-kitchen_light_off-0 +model_id: gemma-3-27b-it +category: light +task: + input_text: Kitchen light off + expect_changes: + light.kitchen_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.kitchen_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Kitchen light off + context: + id: 01KHJQVGY14YG0248Y3NWMRCHK + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:46.881136+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:46.882906+00:00 + - role: user + content: Kitchen light off + attachments: null + created: 2026-02-16 08:07:46.881212+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d8690c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:46.882918+00:00 + duration_ms: 160.887 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-kitchen_light_off-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-kitchen_light_off-1.yaml new file mode 100644 index 000000000..6b0021a4c --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-kitchen_light_off-1.yaml @@ -0,0 +1,294 @@ +--- +uuid: f3c5449d-7d3a-4b79-8544-c66a401902d3 +task_id: dom1_pl_lights-kitchen_light_off-1 +model_id: gemma-3-27b-it +category: light +task: + input_text: Kitchen light off + expect_changes: + light.kitchen_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.kitchen_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Kitchen light off + context: + id: 01KHJQVHB0BR2W68KXA9PY4QE9 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:47.296289+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:47.301661+00:00 + - role: user + content: Kitchen light off + attachments: null + created: 2026-02-16 08:07:47.296374+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d636770>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:47.301676+00:00 + duration_ms: 150.772 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-kitchen_light_off-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-kitchen_light_off-2.yaml new file mode 100644 index 000000000..d9c0de7af --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-kitchen_light_off-2.yaml @@ -0,0 +1,294 @@ +--- +uuid: 44313ef7-9f02-428a-ba76-dc0d72256035 +task_id: dom1_pl_lights-kitchen_light_off-2 +model_id: gemma-3-27b-it +category: light +task: + input_text: Kitchen light off + expect_changes: + light.kitchen_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.kitchen_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Kitchen light off + context: + id: 01KHJQVHRA082ARSERY3KF487X + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:47.722843+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:47.724606+00:00 + - role: user + content: Kitchen light off + attachments: null + created: 2026-02-16 08:07:47.722919+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17da78d50>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:47.724620+00:00 + duration_ms: 149.576 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-kitchen_light_off-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-kitchen_light_off-3.yaml new file mode 100644 index 000000000..a4e41a820 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-kitchen_light_off-3.yaml @@ -0,0 +1,294 @@ +--- +uuid: 946faad5-3067-4a87-9946-7d4a899f68d0 +task_id: dom1_pl_lights-kitchen_light_off-3 +model_id: gemma-3-27b-it +category: light +task: + input_text: Kitchen light off + expect_changes: + light.kitchen_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.kitchen_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Kitchen light off + context: + id: 01KHJQVJ5BQ1VC55P1CMB9D8MJ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:48.139705+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:48.142899+00:00 + - role: user + content: Kitchen light off + attachments: null + created: 2026-02-16 08:07:48.139783+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17de56820>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:48.142913+00:00 + duration_ms: 152.195 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-kitchen_light_off-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-kitchen_light_off-4.yaml new file mode 100644 index 000000000..3e070cd40 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-kitchen_light_off-4.yaml @@ -0,0 +1,294 @@ +--- +uuid: 23a35de5-278c-4621-ac0c-ff0225debe38 +task_id: dom1_pl_lights-kitchen_light_off-4 +model_id: gemma-3-27b-it +category: light +task: + input_text: Kitchen light off + expect_changes: + light.kitchen_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.kitchen_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Kitchen light off + context: + id: 01KHJQVJQXTF39F5DMG1WDSZ96 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:48.733671+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:48.735456+00:00 + - role: user + content: Kitchen light off + attachments: null + created: 2026-02-16 08:07:48.733747+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d3328d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:48.735470+00:00 + duration_ms: 161.653 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_off_the_light-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_off_the_light-0.yaml new file mode 100644 index 000000000..b80c83249 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_off_the_light-0.yaml @@ -0,0 +1,294 @@ +--- +uuid: 958cb956-7be9-4ccd-a466-ad9625487f16 +task_id: dom1_pl_lights-please_turn_off_the_light-0 +model_id: gemma-3-27b-it +category: light +task: + input_text: Please turn off the light + expect_changes: + light.living_room_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Please turn off the light + context: + id: 01KHJQVK550EDJEHN5G57GEZ6N + parent_id: null + user_id: null + conversation_id: null + device_id: 09746e72bdb0059c11ae0f488c38dd3a + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:49.157937+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + You are in area Living Room (floor Ground) and all generic commands like 'turn on the lights' should target this area. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:49.159773+00:00 + - role: user + content: Please turn off the light + attachments: null + created: 2026-02-16 08:07:49.158015+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc1859bd010>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:49.159788+00:00 + duration_ms: 142.136 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_off_the_light-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_off_the_light-1.yaml new file mode 100644 index 000000000..4c17f7ff6 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_off_the_light-1.yaml @@ -0,0 +1,294 @@ +--- +uuid: 1639d6e2-272d-4338-b478-bc8944b2498f +task_id: dom1_pl_lights-please_turn_off_the_light-1 +model_id: gemma-3-27b-it +category: light +task: + input_text: Please turn off the light + expect_changes: + light.living_room_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Please turn off the light + context: + id: 01KHJQVKHFGZDN8WJQJPK1CQM6 + parent_id: null + user_id: null + conversation_id: null + device_id: 2e6c9060d9a8390f30bd3c755ada7962 + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:49.551563+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + You are in area Living Room (floor Ground) and all generic commands like 'turn on the lights' should target this area. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:49.553393+00:00 + - role: user + content: Please turn off the light + attachments: null + created: 2026-02-16 08:07:49.551641+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17dcafed0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:49.553407+00:00 + duration_ms: 141.859 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_off_the_light-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_off_the_light-2.yaml new file mode 100644 index 000000000..f0b524f31 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_off_the_light-2.yaml @@ -0,0 +1,294 @@ +--- +uuid: 81fd10f8-e055-4e5b-8e24-37e995db336d +task_id: dom1_pl_lights-please_turn_off_the_light-2 +model_id: gemma-3-27b-it +category: light +task: + input_text: Please turn off the light + expect_changes: + light.living_room_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Please turn off the light + context: + id: 01KHJQVKYW7J2RS4QJ57X7MYJ4 + parent_id: null + user_id: null + conversation_id: null + device_id: be2db57ec7b555ddbcbdd2ce06eef3bd + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:49.980978+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + You are in area Living Room (floor Ground) and all generic commands like 'turn on the lights' should target this area. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:49.982802+00:00 + - role: user + content: Please turn off the light + attachments: null + created: 2026-02-16 08:07:49.981054+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17daa5f30>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:49.982815+00:00 + duration_ms: 155.956 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_off_the_light-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_off_the_light-3.yaml new file mode 100644 index 000000000..7436f2a41 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_off_the_light-3.yaml @@ -0,0 +1,294 @@ +--- +uuid: e2efac9a-654b-4b60-9175-73815ff0b0af +task_id: dom1_pl_lights-please_turn_off_the_light-3 +model_id: gemma-3-27b-it +category: light +task: + input_text: Please turn off the light + expect_changes: + light.living_room_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Please turn off the light + context: + id: 01KHJQVMCDRV873QWCMPWXAYGR + parent_id: null + user_id: null + conversation_id: null + device_id: d02b2c9b75a2b4db1d2e10c65e1a7c2c + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:50.414071+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + You are in area Living Room (floor Ground) and all generic commands like 'turn on the lights' should target this area. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:50.416242+00:00 + - role: user + content: Please turn off the light + attachments: null + created: 2026-02-16 08:07:50.414148+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d12ec40>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:50.416256+00:00 + duration_ms: 161.225 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_off_the_light-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_off_the_light-4.yaml new file mode 100644 index 000000000..06f4d2c17 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_off_the_light-4.yaml @@ -0,0 +1,294 @@ +--- +uuid: 95aaa7f1-b140-418a-912c-8bdc2bfe4547 +task_id: dom1_pl_lights-please_turn_off_the_light-4 +model_id: gemma-3-27b-it +category: light +task: + input_text: Please turn off the light + expect_changes: + light.living_room_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Please turn off the light + context: + id: 01KHJQVMTHHY2RC3VRRCDHYS73 + parent_id: null + user_id: null + conversation_id: null + device_id: 56165f131817cc87454494534e55715d + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:50.865843+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + You are in area Living Room (floor Ground) and all generic commands like 'turn on the lights' should target this area. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:50.868444+00:00 + - role: user + content: Please turn off the light + attachments: null + created: 2026-02-16 08:07:50.865919+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16cbc8250>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:50.868459+00:00 + duration_ms: 156.878 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_on_the_kitchen_light-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_on_the_kitchen_light-0.yaml new file mode 100644 index 000000000..bc95acfb0 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_on_the_kitchen_light-0.yaml @@ -0,0 +1,294 @@ +--- +uuid: bee613d8-1ff5-4c07-a028-b2118e7ac341 +task_id: dom1_pl_lights-please_turn_on_the_kitchen_light-0 +model_id: gemma-3-27b-it +category: light +task: + input_text: Please turn on the kitchen light + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.kitchen_light: + expected: + state: 'on' + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Please turn on the kitchen light + context: + id: 01KHJQVETK8Y0Q5TSVG7GGTA2M + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:44.723238+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:44.725091+00:00 + - role: user + content: Please turn on the kitchen light + attachments: null + created: 2026-02-16 08:07:44.723329+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc185968ca0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:44.725111+00:00 + duration_ms: 154.124 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_on_the_kitchen_light-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_on_the_kitchen_light-1.yaml new file mode 100644 index 000000000..2c539fc31 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_on_the_kitchen_light-1.yaml @@ -0,0 +1,294 @@ +--- +uuid: 8bd75e02-9e98-4a7a-9c37-d21ca3ca794b +task_id: dom1_pl_lights-please_turn_on_the_kitchen_light-1 +model_id: gemma-3-27b-it +category: light +task: + input_text: Please turn on the kitchen light + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.kitchen_light: + expected: + state: 'on' + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Please turn on the kitchen light + context: + id: 01KHJQVF8M3W5A9ZW6XMCJADND + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:45.172695+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:45.174553+00:00 + - role: user + content: Please turn on the kitchen light + attachments: null + created: 2026-02-16 08:07:45.172786+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17de56ae0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:45.174570+00:00 + duration_ms: 150.369 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_on_the_kitchen_light-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_on_the_kitchen_light-2.yaml new file mode 100644 index 000000000..aa9ae36e4 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_on_the_kitchen_light-2.yaml @@ -0,0 +1,294 @@ +--- +uuid: 435d9c3a-66d1-4921-9f21-63149691c625 +task_id: dom1_pl_lights-please_turn_on_the_kitchen_light-2 +model_id: gemma-3-27b-it +category: light +task: + input_text: Please turn on the kitchen light + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.kitchen_light: + expected: + state: 'on' + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Please turn on the kitchen light + context: + id: 01KHJQVFN544ZWNQ7KTJWT3WCZ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:45.573702+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:45.575472+00:00 + - role: user + content: Please turn on the kitchen light + attachments: null + created: 2026-02-16 08:07:45.573776+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17dd3e770>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:45.575485+00:00 + duration_ms: 145.918 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_on_the_kitchen_light-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_on_the_kitchen_light-3.yaml new file mode 100644 index 000000000..c3c2d1f4d --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_on_the_kitchen_light-3.yaml @@ -0,0 +1,294 @@ +--- +uuid: 809b824b-0dd5-45fc-96a6-7c4d64b47338 +task_id: dom1_pl_lights-please_turn_on_the_kitchen_light-3 +model_id: gemma-3-27b-it +category: light +task: + input_text: Please turn on the kitchen light + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.kitchen_light: + expected: + state: 'on' + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Please turn on the kitchen light + context: + id: 01KHJQVG1ZVMKY7V76VR19P6GB + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:45.983281+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:45.985055+00:00 + - role: user + content: Please turn on the kitchen light + attachments: null + created: 2026-02-16 08:07:45.983357+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17db7e400>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:45.985067+00:00 + duration_ms: 144.628 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_on_the_kitchen_light-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_on_the_kitchen_light-4.yaml new file mode 100644 index 000000000..e369b7e96 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-please_turn_on_the_kitchen_light-4.yaml @@ -0,0 +1,294 @@ +--- +uuid: 8c866d2e-a6e1-4017-8715-22c41ea52082 +task_id: dom1_pl_lights-please_turn_on_the_kitchen_light-4 +model_id: gemma-3-27b-it +category: light +task: + input_text: Please turn on the kitchen light + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.kitchen_light: + expected: + state: 'on' + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Please turn on the kitchen light + context: + id: 01KHJQVGF60RS05AX6WRAD7CNB + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:46.406971+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:46.409729+00:00 + - role: user + content: Please turn on the kitchen light + attachments: null + created: 2026-02-16 08:07:46.407050+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d92cb40>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:46.409745+00:00 + duration_ms: 156.913 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-shut_off_the_upstairs_lights_please-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-shut_off_the_upstairs_lights_please-0.yaml new file mode 100644 index 000000000..bf07e5a92 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-shut_off_the_upstairs_lights_please-0.yaml @@ -0,0 +1,310 @@ +--- +uuid: 715757ce-f992-4821-a391-44c0c2f99ac6 +task_id: dom1_pl_lights-shut_off_the_upstairs_lights_please-0 +model_id: gemma-3-27b-it +category: light +task: + input_text: Shut off the upstairs lights please + expect_changes: + light.bedroom_2_light: + state: 'off' + attributes: null + light.bedroom_3_light: + state: 'off' + attributes: null + light.bedroom_4_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.bedroom_2_light: + expected: + state: 'off' + got: + state: 'on' + light.bedroom_3_light: + expected: + state: 'off' + got: + state: 'on' + light.bedroom_4_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Shut off the upstairs lights please + context: + id: 01KHJQVQE1R8A0JN652GJ2WKH0 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:53.537119+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:53.538999+00:00 + - role: user + content: Shut off the upstairs lights please + attachments: null + created: 2026-02-16 08:07:53.537194+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d28cb40>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:53.539012+00:00 + duration_ms: 64.254 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-shut_off_the_upstairs_lights_please-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-shut_off_the_upstairs_lights_please-1.yaml new file mode 100644 index 000000000..bfa55b143 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-shut_off_the_upstairs_lights_please-1.yaml @@ -0,0 +1,310 @@ +--- +uuid: 5612a189-2856-4a86-9d32-5203fb44786f +task_id: dom1_pl_lights-shut_off_the_upstairs_lights_please-1 +model_id: gemma-3-27b-it +category: light +task: + input_text: Shut off the upstairs lights please + expect_changes: + light.bedroom_2_light: + state: 'off' + attributes: null + light.bedroom_3_light: + state: 'off' + attributes: null + light.bedroom_4_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.bedroom_2_light: + expected: + state: 'off' + got: + state: 'on' + light.bedroom_3_light: + expected: + state: 'off' + got: + state: 'on' + light.bedroom_4_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Shut off the upstairs lights please + context: + id: 01KHJQVQR8GSQF9W5MG1VV83N9 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:53.864733+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:53.866573+00:00 + - role: user + content: Shut off the upstairs lights please + attachments: null + created: 2026-02-16 08:07:53.864811+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17dfd54e0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:53.866585+00:00 + duration_ms: 165.436 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-shut_off_the_upstairs_lights_please-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-shut_off_the_upstairs_lights_please-2.yaml new file mode 100644 index 000000000..e218749aa --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-shut_off_the_upstairs_lights_please-2.yaml @@ -0,0 +1,310 @@ +--- +uuid: 13136190-568e-49de-97e7-2f10614d0ed7 +task_id: dom1_pl_lights-shut_off_the_upstairs_lights_please-2 +model_id: gemma-3-27b-it +category: light +task: + input_text: Shut off the upstairs lights please + expect_changes: + light.bedroom_2_light: + state: 'off' + attributes: null + light.bedroom_3_light: + state: 'off' + attributes: null + light.bedroom_4_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.bedroom_2_light: + expected: + state: 'off' + got: + state: 'on' + light.bedroom_3_light: + expected: + state: 'off' + got: + state: 'on' + light.bedroom_4_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Shut off the upstairs lights please + context: + id: 01KHJQVR54W0706ZM1VCBRPM2T + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:54.276147+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:54.277922+00:00 + - role: user + content: Shut off the upstairs lights please + attachments: null + created: 2026-02-16 08:07:54.276222+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c8cef00>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:54.277934+00:00 + duration_ms: 62.368 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-shut_off_the_upstairs_lights_please-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-shut_off_the_upstairs_lights_please-3.yaml new file mode 100644 index 000000000..f41e63325 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-shut_off_the_upstairs_lights_please-3.yaml @@ -0,0 +1,310 @@ +--- +uuid: 9f78a3fa-7aa3-43d4-8c63-d44210ae8445 +task_id: dom1_pl_lights-shut_off_the_upstairs_lights_please-3 +model_id: gemma-3-27b-it +category: light +task: + input_text: Shut off the upstairs lights please + expect_changes: + light.bedroom_2_light: + state: 'off' + attributes: null + light.bedroom_3_light: + state: 'off' + attributes: null + light.bedroom_4_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.bedroom_2_light: + expected: + state: 'off' + got: + state: 'on' + light.bedroom_3_light: + expected: + state: 'off' + got: + state: 'on' + light.bedroom_4_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Shut off the upstairs lights please + context: + id: 01KHJQVRFJG6GQE2YXVHX0EAGQ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:54.610597+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:54.612857+00:00 + - role: user + content: Shut off the upstairs lights please + attachments: null + created: 2026-02-16 08:07:54.610672+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17dfa2da0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:54.612871+00:00 + duration_ms: 149.38 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-shut_off_the_upstairs_lights_please-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-shut_off_the_upstairs_lights_please-4.yaml new file mode 100644 index 000000000..7697fdf69 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-shut_off_the_upstairs_lights_please-4.yaml @@ -0,0 +1,310 @@ +--- +uuid: de07ca24-bf4a-43ce-b80a-1eb250b31e6c +task_id: dom1_pl_lights-shut_off_the_upstairs_lights_please-4 +model_id: gemma-3-27b-it +category: light +task: + input_text: Shut off the upstairs lights please + expect_changes: + light.bedroom_2_light: + state: 'off' + attributes: null + light.bedroom_3_light: + state: 'off' + attributes: null + light.bedroom_4_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.bedroom_2_light: + expected: + state: 'off' + got: + state: 'on' + light.bedroom_3_light: + expected: + state: 'off' + got: + state: 'on' + light.bedroom_4_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Shut off the upstairs lights please + context: + id: 01KHJQVRWWCA4MEDVEE5KC0X0Y + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:55.036271+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:55.042274+00:00 + - role: user + content: Shut off the upstairs lights please + attachments: null + created: 2026-02-16 08:07:55.036354+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d0f3110>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:55.042291+00:00 + duration_ms: 64.02 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_light-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_light-0.yaml new file mode 100644 index 000000000..f7a733036 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_light-0.yaml @@ -0,0 +1,310 @@ +--- +uuid: bf79fb3b-5533-429e-9e11-3072153fbf65 +task_id: dom1_pl_lights-turn_off_the_light-0 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn off the light + expect_changes: + light.kitchen_light: + state: 'on' + attributes: + brightness: 100 + light.living_room_light: + state: 'on' + attributes: + brightness: 100 + light.dining_room_light: + state: 'on' + attributes: + brightness: 100 + light.bedroom_1_light: + state: 'on' + attributes: null + light.bedroom_2_light: + state: 'on' + attributes: null + light.bedroom_3_light: + state: 'on' + attributes: null + light.bedroom_4_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Turn off the light + context: + id: 01KHJQVN8AE60WQP5JFATV849W + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:51.307075+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:51.308966+00:00 + - role: user + content: Turn off the light + attachments: null + created: 2026-02-16 08:07:51.307152+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc18596b3d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:51.308981+00:00 + duration_ms: 66.8 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_light-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_light-1.yaml new file mode 100644 index 000000000..3dd237556 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_light-1.yaml @@ -0,0 +1,310 @@ +--- +uuid: a68cf9fe-d44f-474e-8ff2-e65f27e53abc +task_id: dom1_pl_lights-turn_off_the_light-1 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn off the light + expect_changes: + light.kitchen_light: + state: 'on' + attributes: + brightness: 100 + light.living_room_light: + state: 'on' + attributes: + brightness: 100 + light.dining_room_light: + state: 'on' + attributes: + brightness: 100 + light.bedroom_1_light: + state: 'on' + attributes: null + light.bedroom_2_light: + state: 'on' + attributes: null + light.bedroom_3_light: + state: 'on' + attributes: null + light.bedroom_4_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Turn off the light + context: + id: 01KHJQVNKC3HR1EHJSB8J5BB7S + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:51.661015+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:51.662928+00:00 + - role: user + content: Turn off the light + attachments: null + created: 2026-02-16 08:07:51.661093+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d2b8ca0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:51.662941+00:00 + duration_ms: 143.626 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_light-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_light-2.yaml new file mode 100644 index 000000000..e092071b9 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_light-2.yaml @@ -0,0 +1,310 @@ +--- +uuid: 8ebb7365-4261-409e-8574-71086e301bcc +task_id: dom1_pl_lights-turn_off_the_light-2 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn off the light + expect_changes: + light.kitchen_light: + state: 'on' + attributes: + brightness: 100 + light.living_room_light: + state: 'on' + attributes: + brightness: 100 + light.dining_room_light: + state: 'on' + attributes: + brightness: 100 + light.bedroom_1_light: + state: 'on' + attributes: null + light.bedroom_2_light: + state: 'on' + attributes: null + light.bedroom_3_light: + state: 'on' + attributes: null + light.bedroom_4_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Turn off the light + context: + id: 01KHJQVP1672Y7T33GA71VVN4C + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:52.103042+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:52.104842+00:00 + - role: user + content: Turn off the light + attachments: null + created: 2026-02-16 08:07:52.103116+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d0c81a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:52.104855+00:00 + duration_ms: 63.245 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_light-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_light-3.yaml new file mode 100644 index 000000000..f868bb5f2 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_light-3.yaml @@ -0,0 +1,310 @@ +--- +uuid: 6dc50989-95ab-4eb9-b392-6a8112add909 +task_id: dom1_pl_lights-turn_off_the_light-3 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn off the light + expect_changes: + light.kitchen_light: + state: 'on' + attributes: + brightness: 100 + light.living_room_light: + state: 'on' + attributes: + brightness: 100 + light.dining_room_light: + state: 'on' + attributes: + brightness: 100 + light.bedroom_1_light: + state: 'on' + attributes: null + light.bedroom_2_light: + state: 'on' + attributes: null + light.bedroom_3_light: + state: 'on' + attributes: null + light.bedroom_4_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Turn off the light + context: + id: 01KHJQVPCEH3JBDAZXPYN6T2B1 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:52.463087+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:52.465433+00:00 + - role: user + content: Turn off the light + attachments: null + created: 2026-02-16 08:07:52.463163+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17de6e1f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:52.465446+00:00 + duration_ms: 66.849 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_light-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_light-4.yaml new file mode 100644 index 000000000..312a52040 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_light-4.yaml @@ -0,0 +1,310 @@ +--- +uuid: c107ce1a-321a-4da5-8893-d19a7b03532d +task_id: dom1_pl_lights-turn_off_the_light-4 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn off the light + expect_changes: + light.kitchen_light: + state: 'on' + attributes: + brightness: 100 + light.living_room_light: + state: 'on' + attributes: + brightness: 100 + light.dining_room_light: + state: 'on' + attributes: + brightness: 100 + light.bedroom_1_light: + state: 'on' + attributes: null + light.bedroom_2_light: + state: 'on' + attributes: null + light.bedroom_3_light: + state: 'on' + attributes: null + light.bedroom_4_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Turn off the light + context: + id: 01KHJQVPW4E783GHB87FCYSK47 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:52.964585+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:52.970375+00:00 + - role: user + content: Turn off the light + attachments: null + created: 2026-02-16 08:07:52.964660+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc1859be560>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:52.970392+00:00 + duration_ms: 65.988 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_lights_upstairs-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_lights_upstairs-0.yaml new file mode 100644 index 000000000..1c164dfd3 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_lights_upstairs-0.yaml @@ -0,0 +1,310 @@ +--- +uuid: cf55c123-74ad-4f5b-97f9-92d927daaf90 +task_id: dom1_pl_lights-turn_off_the_lights_upstairs-0 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn off the lights upstairs + expect_changes: + light.bedroom_2_light: + state: 'off' + attributes: null + light.bedroom_3_light: + state: 'off' + attributes: null + light.bedroom_4_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.bedroom_2_light: + expected: + state: 'off' + got: + state: 'on' + light.bedroom_3_light: + expected: + state: 'off' + got: + state: 'on' + light.bedroom_4_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn off the lights upstairs + context: + id: 01KHJQVS6Z75AWAFYJJ7369RB8 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:55.359805+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:55.361652+00:00 + - role: user + content: Turn off the lights upstairs + attachments: null + created: 2026-02-16 08:07:55.359881+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d5a2da0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:55.361667+00:00 + duration_ms: 61.593 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_lights_upstairs-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_lights_upstairs-1.yaml new file mode 100644 index 000000000..639ef4bb3 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_lights_upstairs-1.yaml @@ -0,0 +1,310 @@ +--- +uuid: 48f18936-0413-4c08-9900-dd3529eca22d +task_id: dom1_pl_lights-turn_off_the_lights_upstairs-1 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn off the lights upstairs + expect_changes: + light.bedroom_2_light: + state: 'off' + attributes: null + light.bedroom_3_light: + state: 'off' + attributes: null + light.bedroom_4_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.bedroom_2_light: + expected: + state: 'off' + got: + state: 'on' + light.bedroom_3_light: + expected: + state: 'off' + got: + state: 'on' + light.bedroom_4_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn off the lights upstairs + context: + id: 01KHJQVSHXW525NZ181AVYM75Q + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:55.709824+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:55.711686+00:00 + - role: user + content: Turn off the lights upstairs + attachments: null + created: 2026-02-16 08:07:55.709904+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc18596b7f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:55.711700+00:00 + duration_ms: 62.159 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_lights_upstairs-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_lights_upstairs-2.yaml new file mode 100644 index 000000000..a9239068d --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_lights_upstairs-2.yaml @@ -0,0 +1,310 @@ +--- +uuid: ce5af5a8-df7f-4531-b3d7-001db8b0a8b2 +task_id: dom1_pl_lights-turn_off_the_lights_upstairs-2 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn off the lights upstairs + expect_changes: + light.bedroom_2_light: + state: 'off' + attributes: null + light.bedroom_3_light: + state: 'off' + attributes: null + light.bedroom_4_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.bedroom_2_light: + expected: + state: 'off' + got: + state: 'on' + light.bedroom_3_light: + expected: + state: 'off' + got: + state: 'on' + light.bedroom_4_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn off the lights upstairs + context: + id: 01KHJQVSWF4TR7ZDM06C0EMJ3S + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:56.047997+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:56.049794+00:00 + - role: user + content: Turn off the lights upstairs + attachments: null + created: 2026-02-16 08:07:56.048073+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d8be610>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:56.049807+00:00 + duration_ms: 65.538 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_lights_upstairs-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_lights_upstairs-3.yaml new file mode 100644 index 000000000..163aa9995 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_lights_upstairs-3.yaml @@ -0,0 +1,310 @@ +--- +uuid: 19f0f20f-379b-4311-bd16-98107fc8681f +task_id: dom1_pl_lights-turn_off_the_lights_upstairs-3 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn off the lights upstairs + expect_changes: + light.bedroom_2_light: + state: 'off' + attributes: null + light.bedroom_3_light: + state: 'off' + attributes: null + light.bedroom_4_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.bedroom_2_light: + expected: + state: 'off' + got: + state: 'on' + light.bedroom_3_light: + expected: + state: 'off' + got: + state: 'on' + light.bedroom_4_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn off the lights upstairs + context: + id: 01KHJQVT6RX6RZ7EEEWJJFMXEV + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:56.376428+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:56.378275+00:00 + - role: user + content: Turn off the lights upstairs + attachments: null + created: 2026-02-16 08:07:56.376507+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c7d0460>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:56.378290+00:00 + duration_ms: 144.111 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_lights_upstairs-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_lights_upstairs-4.yaml new file mode 100644 index 000000000..6c74c1f57 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_lights_upstairs-4.yaml @@ -0,0 +1,310 @@ +--- +uuid: 5d25b6e7-3b3e-43b1-9b42-50ffd164c31f +task_id: dom1_pl_lights-turn_off_the_lights_upstairs-4 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn off the lights upstairs + expect_changes: + light.bedroom_2_light: + state: 'off' + attributes: null + light.bedroom_3_light: + state: 'off' + attributes: null + light.bedroom_4_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.bedroom_2_light: + expected: + state: 'off' + got: + state: 'on' + light.bedroom_3_light: + expected: + state: 'off' + got: + state: 'on' + light.bedroom_4_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn off the lights upstairs + context: + id: 01KHJQVTKYP93XBG8BJM31DFZT + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:56.798307+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:56.800103+00:00 + - role: user + content: Turn off the lights upstairs + attachments: null + created: 2026-02-16 08:07:56.798383+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c9e2fb0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:56.800116+00:00 + duration_ms: 148.089 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-0.yaml new file mode 100644 index 000000000..60ae0d668 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-0.yaml @@ -0,0 +1,297 @@ +--- +uuid: 9bfc5f56-944a-47d2-a0af-be932ee204ad +task_id: dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-0 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn off the living room light then turn on the kitchen light + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null + light.living_room_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn off the living room light then turn on the kitchen light + context: + id: 01KHJQW2YPZSXSDH66MY56TQJD + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:05.335077+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:05.337618+00:00 + - role: user + content: Turn off the living room light then turn on the kitchen light + attachments: null + created: 2026-02-16 08:08:05.335153+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d9f6cf0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:05.337632+00:00 + duration_ms: 67.828 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-1.yaml new file mode 100644 index 000000000..457e7e4de --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-1.yaml @@ -0,0 +1,297 @@ +--- +uuid: 415182c1-eb5b-4c83-a3ac-580299675f25 +task_id: dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-1 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn off the living room light then turn on the kitchen light + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null + light.living_room_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn off the living room light then turn on the kitchen light + context: + id: 01KHJQW398BVKK1M48TD5W3SSS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:05.672404+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:05.676098+00:00 + - role: user + content: Turn off the living room light then turn on the kitchen light + attachments: null + created: 2026-02-16 08:08:05.672509+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c8e1b10>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:05.676114+00:00 + duration_ms: 74.597 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-2.yaml new file mode 100644 index 000000000..54f59ccc7 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-2.yaml @@ -0,0 +1,297 @@ +--- +uuid: 33d221d3-e9d9-4fdd-beef-9ecbe499d75f +task_id: dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-2 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn off the living room light then turn on the kitchen light + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null + light.living_room_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn off the living room light then turn on the kitchen light + context: + id: 01KHJQW3NC1MW79967278C55QB + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:06.060611+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:06.087679+00:00 + - role: user + content: Turn off the living room light then turn on the kitchen light + attachments: null + created: 2026-02-16 08:08:06.060691+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d6ae6c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:06.087700+00:00 + duration_ms: 110.493 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-3.yaml new file mode 100644 index 000000000..2d2aedf9d --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-3.yaml @@ -0,0 +1,297 @@ +--- +uuid: 8e8c726e-2fb7-4dc9-a3f9-8d383be12770 +task_id: dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-3 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn off the living room light then turn on the kitchen light + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null + light.living_room_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn off the living room light then turn on the kitchen light + context: + id: 01KHJQW40GR46PKH37XC8BJZWJ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:06.416657+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:06.418536+00:00 + - role: user + content: Turn off the living room light then turn on the kitchen light + attachments: null + created: 2026-02-16 08:08:06.416733+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17db7f690>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:06.418551+00:00 + duration_ms: 60.904 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-4.yaml new file mode 100644 index 000000000..c08fbcf14 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-4.yaml @@ -0,0 +1,297 @@ +--- +uuid: ba459c07-309b-4895-8807-ab2e215afac9 +task_id: dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-4 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn off the living room light then turn on the kitchen light + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null + light.living_room_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn off the living room light then turn on the kitchen light + context: + id: 01KHJQW49PKM3JP98D1X7C9X2H + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:06.710892+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:06.712716+00:00 + - role: user + content: Turn off the living room light then turn on the kitchen light + attachments: null + created: 2026-02-16 08:08:06.710966+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d2009e0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:06.712731+00:00 + duration_ms: 57.591 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-0.yaml new file mode 100644 index 000000000..355d44041 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-0.yaml @@ -0,0 +1,310 @@ +--- +uuid: bd79f5eb-f2ab-4914-815f-865cc44123bd +task_id: dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-0 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on all the bedroom lights upstairs + expect_changes: + light.bedroom_2_light: + state: 'on' + attributes: null + light.bedroom_3_light: + state: 'on' + attributes: null + light.bedroom_4_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.bedroom_2_light: + expected: + state: 'on' + got: + state: 'off' + light.bedroom_3_light: + expected: + state: 'on' + got: + state: 'off' + light.bedroom_4_light: + expected: + state: 'on' + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Turn on all the bedroom lights upstairs + context: + id: 01KHJQVWZ3ZAWHS6MGN5SA6TER + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:59.203364+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:59.205271+00:00 + - role: user + content: Turn on all the bedroom lights upstairs + attachments: null + created: 2026-02-16 08:07:59.203444+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d00ee50>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:59.205284+00:00 + duration_ms: 65.9 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-1.yaml new file mode 100644 index 000000000..bfa29ec59 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-1.yaml @@ -0,0 +1,310 @@ +--- +uuid: 32246684-1404-45f5-b1a8-39ba10374a14 +task_id: dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-1 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on all the bedroom lights upstairs + expect_changes: + light.bedroom_2_light: + state: 'on' + attributes: null + light.bedroom_3_light: + state: 'on' + attributes: null + light.bedroom_4_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.bedroom_2_light: + expected: + state: 'on' + got: + state: 'off' + light.bedroom_3_light: + expected: + state: 'on' + got: + state: 'off' + light.bedroom_4_light: + expected: + state: 'on' + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Turn on all the bedroom lights upstairs + context: + id: 01KHJQVXA2CX7HAH1T36V4HDHD + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:59.554551+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:59.556385+00:00 + - role: user + content: Turn on all the bedroom lights upstairs + attachments: null + created: 2026-02-16 08:07:59.554631+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d94dc70>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:59.556397+00:00 + duration_ms: 72.465 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-2.yaml new file mode 100644 index 000000000..243bbb62a --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-2.yaml @@ -0,0 +1,310 @@ +--- +uuid: 9993ec35-b378-47d2-835e-7b06571fba5b +task_id: dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-2 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on all the bedroom lights upstairs + expect_changes: + light.bedroom_2_light: + state: 'on' + attributes: null + light.bedroom_3_light: + state: 'on' + attributes: null + light.bedroom_4_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.bedroom_2_light: + expected: + state: 'on' + got: + state: 'off' + light.bedroom_3_light: + expected: + state: 'on' + got: + state: 'off' + light.bedroom_4_light: + expected: + state: 'on' + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Turn on all the bedroom lights upstairs + context: + id: 01KHJQVXN06X37E4BWEPZT6V56 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:59.904490+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:59.910854+00:00 + - role: user + content: Turn on all the bedroom lights upstairs + attachments: null + created: 2026-02-16 08:07:59.904567+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17dbf6da0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:59.910871+00:00 + duration_ms: 69.127 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-3.yaml new file mode 100644 index 000000000..e2029ec71 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-3.yaml @@ -0,0 +1,310 @@ +--- +uuid: 58d3b0be-e2ae-49fa-8e77-ce8a07355ff6 +task_id: dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-3 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on all the bedroom lights upstairs + expect_changes: + light.bedroom_2_light: + state: 'on' + attributes: null + light.bedroom_3_light: + state: 'on' + attributes: null + light.bedroom_4_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.bedroom_2_light: + expected: + state: 'on' + got: + state: 'off' + light.bedroom_3_light: + expected: + state: 'on' + got: + state: 'off' + light.bedroom_4_light: + expected: + state: 'on' + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Turn on all the bedroom lights upstairs + context: + id: 01KHJQVXZC529YDKB12KZE93BV + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:00.236322+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:00.238136+00:00 + - role: user + content: Turn on all the bedroom lights upstairs + attachments: null + created: 2026-02-16 08:08:00.236397+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc1846071c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:00.238149+00:00 + duration_ms: 63.801 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-4.yaml new file mode 100644 index 000000000..6c3c3e069 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-4.yaml @@ -0,0 +1,310 @@ +--- +uuid: f99f7823-80cd-49c6-81c2-b3659db37fa2 +task_id: dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-4 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on all the bedroom lights upstairs + expect_changes: + light.bedroom_2_light: + state: 'on' + attributes: null + light.bedroom_3_light: + state: 'on' + attributes: null + light.bedroom_4_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.bedroom_2_light: + expected: + state: 'on' + got: + state: 'off' + light.bedroom_3_light: + expected: + state: 'on' + got: + state: 'off' + light.bedroom_4_light: + expected: + state: 'on' + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Turn on all the bedroom lights upstairs + context: + id: 01KHJQVY9CKP643M0T7P49QZSN + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:00.556856+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:00.560029+00:00 + - role: user + content: Turn on all the bedroom lights upstairs + attachments: null + created: 2026-02-16 08:08:00.556929+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17dd79220>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:00.560042+00:00 + duration_ms: 167.205 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_upstairs_lights-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_upstairs_lights-0.yaml new file mode 100644 index 000000000..89117c5db --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_upstairs_lights-0.yaml @@ -0,0 +1,310 @@ +--- +uuid: 004d54a7-0327-4e30-8199-21a862314631 +task_id: dom1_pl_lights-turn_on_all_the_upstairs_lights-0 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on all the upstairs lights + expect_changes: + light.bedroom_2_light: + state: 'on' + attributes: null + light.bedroom_3_light: + state: 'on' + attributes: null + light.bedroom_4_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.bedroom_2_light: + expected: + state: 'on' + got: + state: 'off' + light.bedroom_3_light: + expected: + state: 'on' + got: + state: 'off' + light.bedroom_4_light: + expected: + state: 'on' + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Turn on all the upstairs lights + context: + id: 01KHJQVV1HTFH04HVMPMAK03MF + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:57.234008+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:57.237317+00:00 + - role: user + content: Turn on all the upstairs lights + attachments: null + created: 2026-02-16 08:07:57.234083+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc1859bf530>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:57.237333+00:00 + duration_ms: 66.703 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_upstairs_lights-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_upstairs_lights-1.yaml new file mode 100644 index 000000000..3308f63c8 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_upstairs_lights-1.yaml @@ -0,0 +1,310 @@ +--- +uuid: bc011066-34ba-4f7f-95d7-7dec2249e17d +task_id: dom1_pl_lights-turn_on_all_the_upstairs_lights-1 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on all the upstairs lights + expect_changes: + light.bedroom_2_light: + state: 'on' + attributes: null + light.bedroom_3_light: + state: 'on' + attributes: null + light.bedroom_4_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.bedroom_2_light: + expected: + state: 'on' + got: + state: 'off' + light.bedroom_3_light: + expected: + state: 'on' + got: + state: 'off' + light.bedroom_4_light: + expected: + state: 'on' + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Turn on all the upstairs lights + context: + id: 01KHJQVVDBS63W2RPY6X5RXQ69 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:57.611781+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:57.613667+00:00 + - role: user + content: Turn on all the upstairs lights + attachments: null + created: 2026-02-16 08:07:57.611861+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d7f3060>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:57.613681+00:00 + duration_ms: 61.995 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_upstairs_lights-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_upstairs_lights-2.yaml new file mode 100644 index 000000000..3abbe9671 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_upstairs_lights-2.yaml @@ -0,0 +1,310 @@ +--- +uuid: 6b0158c4-d9e4-4758-b305-2257d3d15083 +task_id: dom1_pl_lights-turn_on_all_the_upstairs_lights-2 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on all the upstairs lights + expect_changes: + light.bedroom_2_light: + state: 'on' + attributes: null + light.bedroom_3_light: + state: 'on' + attributes: null + light.bedroom_4_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.bedroom_2_light: + expected: + state: 'on' + got: + state: 'off' + light.bedroom_3_light: + expected: + state: 'on' + got: + state: 'off' + light.bedroom_4_light: + expected: + state: 'on' + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Turn on all the upstairs lights + context: + id: 01KHJQVVRZRXHFW0CB7QZ8TG1K + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:57.983131+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:57.984987+00:00 + - role: user + content: Turn on all the upstairs lights + attachments: null + created: 2026-02-16 08:07:57.983209+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c898eb0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:57.985003+00:00 + duration_ms: 183.73 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_upstairs_lights-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_upstairs_lights-3.yaml new file mode 100644 index 000000000..03e54311b --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_upstairs_lights-3.yaml @@ -0,0 +1,310 @@ +--- +uuid: 878cad41-33c5-4f87-ba01-750b9fda7c26 +task_id: dom1_pl_lights-turn_on_all_the_upstairs_lights-3 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on all the upstairs lights + expect_changes: + light.bedroom_2_light: + state: 'on' + attributes: null + light.bedroom_3_light: + state: 'on' + attributes: null + light.bedroom_4_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.bedroom_2_light: + expected: + state: 'on' + got: + state: 'off' + light.bedroom_3_light: + expected: + state: 'on' + got: + state: 'off' + light.bedroom_4_light: + expected: + state: 'on' + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Turn on all the upstairs lights + context: + id: 01KHJQVW6VYX8471HE1YT5NXWK + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:58.427109+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:58.429000+00:00 + - role: user + content: Turn on all the upstairs lights + attachments: null + created: 2026-02-16 08:07:58.427187+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17de55dd0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:58.429014+00:00 + duration_ms: 144.905 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_upstairs_lights-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_upstairs_lights-4.yaml new file mode 100644 index 000000000..d644f2b3c --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_all_the_upstairs_lights-4.yaml @@ -0,0 +1,310 @@ +--- +uuid: 58dfcc7e-d32d-4fce-a659-fcf9e5bea376 +task_id: dom1_pl_lights-turn_on_all_the_upstairs_lights-4 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on all the upstairs lights + expect_changes: + light.bedroom_2_light: + state: 'on' + attributes: null + light.bedroom_3_light: + state: 'on' + attributes: null + light.bedroom_4_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.bedroom_2_light: + expected: + state: 'on' + got: + state: 'off' + light.bedroom_3_light: + expected: + state: 'on' + got: + state: 'off' + light.bedroom_4_light: + expected: + state: 'on' + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Turn on all the upstairs lights + context: + id: 01KHJQVWKEWTHXKXWBV5YYNQGP + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:58.830861+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:07:58.832708+00:00 + - role: user + content: Turn on all the upstairs lights + attachments: null + created: 2026-02-16 08:07:58.830935+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d5180f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:58.832721+00:00 + duration_ms: 64.249 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-0.yaml new file mode 100644 index 000000000..100896f24 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-0.yaml @@ -0,0 +1,292 @@ +--- +uuid: 19e2317c-edb8-4826-91e9-98977e2a15b8 +task_id: dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-0 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on both the kitchen light and living room lights + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null + light.living_room_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Turn on both the kitchen light and living room lights + context: + id: 01KHJQW4M2P2DSK5GC6DPFERWH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:07.043082+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:07.044997+00:00 + - role: user + content: Turn on both the kitchen light and living room lights + attachments: null + created: 2026-02-16 08:08:07.043158+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d82c250>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:07.045010+00:00 + duration_ms: 63.776 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-1.yaml new file mode 100644 index 000000000..a4e5279c4 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-1.yaml @@ -0,0 +1,292 @@ +--- +uuid: d1129fd9-f737-4e45-8fd1-7aeff6389512 +task_id: dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-1 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on both the kitchen light and living room lights + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null + light.living_room_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Turn on both the kitchen light and living room lights + context: + id: 01KHJQW4YJJAS7PW5N5DVG0WAB + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:07.378277+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:07.380191+00:00 + - role: user + content: Turn on both the kitchen light and living room lights + attachments: null + created: 2026-02-16 08:08:07.378358+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d105f30>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:07.380204+00:00 + duration_ms: 61.235 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-2.yaml new file mode 100644 index 000000000..e6cbb5b73 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-2.yaml @@ -0,0 +1,292 @@ +--- +uuid: a73eb935-f4de-4577-9ca7-f40a54833199 +task_id: dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-2 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on both the kitchen light and living room lights + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null + light.living_room_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Turn on both the kitchen light and living room lights + context: + id: 01KHJQW593J0PK0JTXK8640R5F + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:07.715457+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:07.717222+00:00 + - role: user + content: Turn on both the kitchen light and living room lights + attachments: null + created: 2026-02-16 08:08:07.715536+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d28c880>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:07.717248+00:00 + duration_ms: 64.158 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-3.yaml new file mode 100644 index 000000000..43bbdd8ba --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-3.yaml @@ -0,0 +1,292 @@ +--- +uuid: a3a21bf8-9617-4d51-a14a-515f4030607f +task_id: dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-3 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on both the kitchen light and living room lights + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null + light.living_room_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Turn on both the kitchen light and living room lights + context: + id: 01KHJQW5MARE932ZZR3ABDSQTE + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:08.074818+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:08.086532+00:00 + - role: user + content: Turn on both the kitchen light and living room lights + attachments: null + created: 2026-02-16 08:08:08.074894+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17de05dd0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:08.086551+00:00 + duration_ms: 67.332 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-4.yaml new file mode 100644 index 000000000..0b94c1101 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-4.yaml @@ -0,0 +1,292 @@ +--- +uuid: 2f269c9a-d05d-4e10-8073-053e1a0e4a37 +task_id: dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-4 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on both the kitchen light and living room lights + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null + light.living_room_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Turn on both the kitchen light and living room lights + context: + id: 01KHJQW5Y91R5GNWD7MY5600DT + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:08.393334+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:08.395254+00:00 + - role: user + content: Turn on both the kitchen light and living room lights + attachments: null + created: 2026-02-16 08:08:08.393413+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17db05e80>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:08.395268+00:00 + duration_ms: 61.504 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-0.yaml new file mode 100644 index 000000000..e70bf33fc --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-0.yaml @@ -0,0 +1,297 @@ +--- +uuid: b732916b-39a6-43a7-aba5-5e16598589cb +task_id: dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-0 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on the kitchen light and then turn off the living room light + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null + light.living_room_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn on the kitchen light and then turn off the living room light + context: + id: 01KHJQVYQ73NS547KKWM8WJ4AX + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:00.999498+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:01.001367+00:00 + - role: user + content: Turn on the kitchen light and then turn off the living room light + attachments: null + created: 2026-02-16 08:08:00.999575+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d8496f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:01.001381+00:00 + duration_ms: 143.207 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-1.yaml new file mode 100644 index 000000000..f16e14fc0 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-1.yaml @@ -0,0 +1,297 @@ +--- +uuid: d45ece5a-fc55-4f16-be23-fad7a9ee5531 +task_id: dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-1 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on the kitchen light and then turn off the living room light + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null + light.living_room_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn on the kitchen light and then turn off the living room light + context: + id: 01KHJQVZ3QX494MYKGDYYWMV77 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:01.399127+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:01.401937+00:00 + - role: user + content: Turn on the kitchen light and then turn off the living room light + attachments: null + created: 2026-02-16 08:08:01.399207+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d066da0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:01.401952+00:00 + duration_ms: 65.68 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-2.yaml new file mode 100644 index 000000000..e599bde62 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-2.yaml @@ -0,0 +1,297 @@ +--- +uuid: 3a1df2d7-a18a-452d-9168-23099c5c9a6f +task_id: dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-2 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on the kitchen light and then turn off the living room light + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null + light.living_room_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn on the kitchen light and then turn off the living room light + context: + id: 01KHJQVZECHXQDKHYTVJ90V67K + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:01.740740+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:01.742691+00:00 + - role: user + content: Turn on the kitchen light and then turn off the living room light + attachments: null + created: 2026-02-16 08:08:01.740818+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c8b2cf0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:01.742705+00:00 + duration_ms: 149.198 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-3.yaml new file mode 100644 index 000000000..d6a212dd8 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-3.yaml @@ -0,0 +1,297 @@ +--- +uuid: e60e3e2f-a145-4189-99de-4c310f0fa770 +task_id: dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-3 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on the kitchen light and then turn off the living room light + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null + light.living_room_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn on the kitchen light and then turn off the living room light + context: + id: 01KHJQW0EP9NC8E4HN43HV80PX + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:02.775055+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:02.778739+00:00 + - role: user + content: Turn on the kitchen light and then turn off the living room light + attachments: null + created: 2026-02-16 08:08:02.775130+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d50da60>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:02.778754+00:00 + duration_ms: 66.151 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-4.yaml new file mode 100644 index 000000000..51d367056 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-4.yaml @@ -0,0 +1,297 @@ +--- +uuid: 2c2f35da-7b6e-4326-b96f-5e50f168b886 +task_id: dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-4 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on the kitchen light and then turn off the living room light + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null + light.living_room_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn on the kitchen light and then turn off the living room light + context: + id: 01KHJQW0T8V86PHWHB9NB7YE4N + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:03.144610+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:03.146493+00:00 + - role: user + content: Turn on the kitchen light and then turn off the living room light + attachments: null + created: 2026-02-16 08:08:03.144686+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c8987d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:03.146506+00:00 + duration_ms: 59.695 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-0.yaml new file mode 100644 index 000000000..c24567809 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-0.yaml @@ -0,0 +1,300 @@ +--- +uuid: 0da222ee-903e-4de6-931e-f8e9af0c6b2e +task_id: dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-0 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on the light in the kitchen, then turn off the light in the living + room + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null + light.living_room_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn on the light in the kitchen, then turn off the light in the living + room + context: + id: 01KHJQW14RXBJYG7AR10ZVP26K + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:03.480471+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:03.482967+00:00 + - role: user + content: Turn on the light in the kitchen, then turn off the light in the + living room + attachments: null + created: 2026-02-16 08:08:03.480549+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d5f6090>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:03.482980+00:00 + duration_ms: 67.116 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-1.yaml new file mode 100644 index 000000000..f17d87dc8 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-1.yaml @@ -0,0 +1,300 @@ +--- +uuid: 9a78489f-f7c6-463b-9a28-613b0830d677 +task_id: dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-1 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on the light in the kitchen, then turn off the light in the living + room + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null + light.living_room_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn on the light in the kitchen, then turn off the light in the living + room + context: + id: 01KHJQW1FJPRGZ7P297KTG15C7 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:03.826281+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:03.828005+00:00 + - role: user + content: Turn on the light in the kitchen, then turn off the light in the + living room + attachments: null + created: 2026-02-16 08:08:03.826362+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c7a94e0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:03.828018+00:00 + duration_ms: 61.861 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-2.yaml new file mode 100644 index 000000000..67280babf --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-2.yaml @@ -0,0 +1,300 @@ +--- +uuid: a80abfa4-49d9-4554-8019-486455e0f11a +task_id: dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-2 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on the light in the kitchen, then turn off the light in the living + room + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null + light.living_room_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn on the light in the kitchen, then turn off the light in the living + room + context: + id: 01KHJQW1SSPAK57P6DJCVCMC22 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:04.153714+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:04.158516+00:00 + - role: user + content: Turn on the light in the kitchen, then turn off the light in the + living room + attachments: null + created: 2026-02-16 08:08:04.153789+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d6ac5c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:04.158531+00:00 + duration_ms: 148.07 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-3.yaml new file mode 100644 index 000000000..b5a350fdd --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-3.yaml @@ -0,0 +1,300 @@ +--- +uuid: a8d5a570-bfc5-48c3-874c-172164ba4db6 +task_id: dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-3 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on the light in the kitchen, then turn off the light in the living + room + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null + light.living_room_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn on the light in the kitchen, then turn off the light in the living + room + context: + id: 01KHJQW274TQARMV7YZXWRS14D + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:04.580198+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:04.582069+00:00 + - role: user + content: Turn on the light in the kitchen, then turn off the light in the + living room + attachments: null + created: 2026-02-16 08:08:04.580302+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17dbf7ab0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:04.582081+00:00 + duration_ms: 57.579 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-4.yaml new file mode 100644 index 000000000..bc94def13 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-4.yaml @@ -0,0 +1,300 @@ +--- +uuid: 6c32972d-8382-47cc-aebc-340d1cd20069 +task_id: dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-4 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on the light in the kitchen, then turn off the light in the living + room + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null + light.living_room_light: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + light.living_room_light: + expected: + state: 'off' + got: + state: 'on' + conversation_trace: + - event_type: async_process + data: + text: Turn on the light in the kitchen, then turn off the light in the living + room + context: + id: 01KHJQW2H5TA6SDSC6WZ522Q5K + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:04.902089+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:04.903931+00:00 + - role: user + content: Turn on the light in the kitchen, then turn off the light in the + living room + attachments: null + created: 2026-02-16 08:08:04.902166+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17da7afb0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:04.903943+00:00 + duration_ms: 144.859 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-0.yaml new file mode 100644 index 000000000..cc623dfd5 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-0.yaml @@ -0,0 +1,292 @@ +--- +uuid: 3bace1d0-91f3-43bc-8379-f0b21c6f686b +task_id: dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-0 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on the living room and kitchen lights + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null + light.living_room_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Turn on the living room and kitchen lights + context: + id: 01KHJQW683ZTBWDZBVD88XW9H2 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:08.707550+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:08.709340+00:00 + - role: user + content: Turn on the living room and kitchen lights + attachments: null + created: 2026-02-16 08:08:08.707628+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16ff82a30>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:08.709353+00:00 + duration_ms: 60.124 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-1.yaml new file mode 100644 index 000000000..eb2906785 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-1.yaml @@ -0,0 +1,292 @@ +--- +uuid: 40b98662-71b1-4a45-aa6f-45a8437ff921 +task_id: dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-1 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on the living room and kitchen lights + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null + light.living_room_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Turn on the living room and kitchen lights + context: + id: 01KHJQW6JZY2BSM0ENKH0E4120 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:09.055888+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:09.061222+00:00 + - role: user + content: Turn on the living room and kitchen lights + attachments: null + created: 2026-02-16 08:08:09.055964+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d976ae0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:09.061252+00:00 + duration_ms: 69.73 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-2.yaml new file mode 100644 index 000000000..1f6d3e8f8 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-2.yaml @@ -0,0 +1,292 @@ +--- +uuid: 9ea6c45f-82e5-4b53-a48e-9962f99de9a8 +task_id: dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-2 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on the living room and kitchen lights + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null + light.living_room_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Turn on the living room and kitchen lights + context: + id: 01KHJQW6XJTF71MC06980ATRN8 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:09.394547+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:09.399788+00:00 + - role: user + content: Turn on the living room and kitchen lights + attachments: null + created: 2026-02-16 08:08:09.394623+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17ddafc10>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:09.399805+00:00 + duration_ms: 174.557 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-3.yaml new file mode 100644 index 000000000..f9badd1ae --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-3.yaml @@ -0,0 +1,292 @@ +--- +uuid: 155967b9-ca63-45cc-adad-24c94325b7f4 +task_id: dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-3 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on the living room and kitchen lights + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null + light.living_room_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Turn on the living room and kitchen lights + context: + id: 01KHJQW7C53YQCK1JTG6GFNE8W + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:09.861117+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:09.863004+00:00 + - role: user + content: Turn on the living room and kitchen lights + attachments: null + created: 2026-02-16 08:08:09.861192+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d06ce00>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:09.863018+00:00 + duration_ms: 62.337 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-4.yaml new file mode 100644 index 000000000..2b8e2d42e --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-4.yaml @@ -0,0 +1,292 @@ +--- +uuid: db3eed24-9c94-422a-9dcd-352a2fccaf3d +task_id: dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-4 +model_id: gemma-3-27b-it +category: light +task: + input_text: Turn on the living room and kitchen lights + expect_changes: + light.kitchen_light: + state: 'on' + attributes: null + light.living_room_light: + state: 'on' + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Turn on the living room and kitchen lights + context: + id: 01KHJQW7PFXZAXJAHK3Q048VNX + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:10.191162+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:10.195038+00:00 + - role: user + content: Turn on the living room and kitchen lights + attachments: null + created: 2026-02-16 08:08:10.191287+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d1517a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:10.195055+00:00 + duration_ms: 150.96 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_apples_to_my_trader_joe_s_list-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_apples_to_my_trader_joe_s_list-0.yaml new file mode 100644 index 000000000..472071cb6 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_apples_to_my_trader_joe_s_list-0.yaml @@ -0,0 +1,294 @@ +--- +uuid: 19e52645-a0df-45bf-86ff-02896d216f78 +task_id: dom1_pl_todo-add_apples_to_my_trader_joe_s_list-0 +model_id: gemma-3-27b-it +category: todo +task: + input_text: add apples to my trader joe's list + expect_changes: + todo.trader_joe_s: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.trader_joe_s: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: add apples to my trader joe's list + context: + id: 01KHJQW84R65E53DJB0P51Q1MB + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:10.648867+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:10.650794+00:00 + - role: user + content: add apples to my trader joe's list + attachments: null + created: 2026-02-16 08:08:10.648946+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16cbb17a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:10.650808+00:00 + duration_ms: 58.296 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_apples_to_my_trader_joe_s_list-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_apples_to_my_trader_joe_s_list-1.yaml new file mode 100644 index 000000000..1d2c07afd --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_apples_to_my_trader_joe_s_list-1.yaml @@ -0,0 +1,294 @@ +--- +uuid: 3ec15759-22a9-494a-bf0b-76320bd405a9 +task_id: dom1_pl_todo-add_apples_to_my_trader_joe_s_list-1 +model_id: gemma-3-27b-it +category: todo +task: + input_text: add apples to my trader joe's list + expect_changes: + todo.trader_joe_s: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.trader_joe_s: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: add apples to my trader joe's list + context: + id: 01KHJQW8EE7F2MTSS8WY184VY7 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:10.958418+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:10.960240+00:00 + - role: user + content: add apples to my trader joe's list + attachments: null + created: 2026-02-16 08:08:10.958495+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d5b4b40>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:10.960254+00:00 + duration_ms: 61.469 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_apples_to_my_trader_joe_s_list-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_apples_to_my_trader_joe_s_list-2.yaml new file mode 100644 index 000000000..dc0edcc98 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_apples_to_my_trader_joe_s_list-2.yaml @@ -0,0 +1,294 @@ +--- +uuid: b0b3b485-1e9b-48f7-a8dd-3fef2ca76390 +task_id: dom1_pl_todo-add_apples_to_my_trader_joe_s_list-2 +model_id: gemma-3-27b-it +category: todo +task: + input_text: add apples to my trader joe's list + expect_changes: + todo.trader_joe_s: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.trader_joe_s: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: add apples to my trader joe's list + context: + id: 01KHJQW8SA8ZWXZA927DJXSVE4 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:11.306722+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:11.309624+00:00 + - role: user + content: add apples to my trader joe's list + attachments: null + created: 2026-02-16 08:08:11.306825+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d2ea8d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:11.309642+00:00 + duration_ms: 169.733 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_apples_to_my_trader_joe_s_list-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_apples_to_my_trader_joe_s_list-3.yaml new file mode 100644 index 000000000..d2dc6da55 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_apples_to_my_trader_joe_s_list-3.yaml @@ -0,0 +1,294 @@ +--- +uuid: 73b52b7b-843a-4ac0-9288-a2a75b572050 +task_id: dom1_pl_todo-add_apples_to_my_trader_joe_s_list-3 +model_id: gemma-3-27b-it +category: todo +task: + input_text: add apples to my trader joe's list + expect_changes: + todo.trader_joe_s: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.trader_joe_s: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: add apples to my trader joe's list + context: + id: 01KHJQW96F3ZVPJ8V8QS83NF85 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:11.727247+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:11.730381+00:00 + - role: user + content: add apples to my trader joe's list + attachments: null + created: 2026-02-16 08:08:11.727352+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17ddec720>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:11.730395+00:00 + duration_ms: 61.693 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_apples_to_my_trader_joe_s_list-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_apples_to_my_trader_joe_s_list-4.yaml new file mode 100644 index 000000000..b10142715 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_apples_to_my_trader_joe_s_list-4.yaml @@ -0,0 +1,294 @@ +--- +uuid: 84639ddc-dae9-410d-8f12-7ecc9d70ca3c +task_id: dom1_pl_todo-add_apples_to_my_trader_joe_s_list-4 +model_id: gemma-3-27b-it +category: todo +task: + input_text: add apples to my trader joe's list + expect_changes: + todo.trader_joe_s: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.trader_joe_s: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: add apples to my trader joe's list + context: + id: 01KHJQW9J6C4J5084BYQ88DTWB + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:12.102383+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:12.104301+00:00 + - role: user + content: add apples to my trader joe's list + attachments: null + created: 2026-02-16 08:08:12.102460+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d862980>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:12.104314+00:00 + duration_ms: 68.903 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_personal_tasks-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_personal_tasks-0.yaml new file mode 100644 index 000000000..62d57670f --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_personal_tasks-0.yaml @@ -0,0 +1,294 @@ +--- +uuid: 9a3c2544-5203-4a58-afe7-4badb883e7bc +task_id: dom1_pl_todo-add_history_homework_to_my_personal_tasks-0 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Add history homework to my personal tasks + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Add history homework to my personal tasks + context: + id: 01KHJQWH83W7J38PAJJV40PXDD + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:19.971630+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:19.973444+00:00 + - role: user + content: Add history homework to my personal tasks + attachments: null + created: 2026-02-16 08:08:19.971706+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d94f320>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:19.973458+00:00 + duration_ms: 57.304 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_personal_tasks-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_personal_tasks-1.yaml new file mode 100644 index 000000000..747e7d2ec --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_personal_tasks-1.yaml @@ -0,0 +1,294 @@ +--- +uuid: 4f3e7ad7-86c3-4680-ae57-0e4ac17906c4 +task_id: dom1_pl_todo-add_history_homework_to_my_personal_tasks-1 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Add history homework to my personal tasks + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Add history homework to my personal tasks + context: + id: 01KHJQWJ20WC4ARD79ZP2P423P + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:20.801042+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:20.802868+00:00 + - role: user + content: Add history homework to my personal tasks + attachments: null + created: 2026-02-16 08:08:20.801118+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d93a090>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:20.802882+00:00 + duration_ms: 157.903 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_personal_tasks-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_personal_tasks-2.yaml new file mode 100644 index 000000000..92eaca4e1 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_personal_tasks-2.yaml @@ -0,0 +1,294 @@ +--- +uuid: 85596ce1-1ccb-4eda-b1ef-7e2334b741ab +task_id: dom1_pl_todo-add_history_homework_to_my_personal_tasks-2 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Add history homework to my personal tasks + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Add history homework to my personal tasks + context: + id: 01KHJQWJFNVR7F4PH65YFT1259 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:21.237167+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:21.240992+00:00 + - role: user + content: Add history homework to my personal tasks + attachments: null + created: 2026-02-16 08:08:21.237282+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17dcde400>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:21.241007+00:00 + duration_ms: 63.309 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_personal_tasks-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_personal_tasks-3.yaml new file mode 100644 index 000000000..67a91aace --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_personal_tasks-3.yaml @@ -0,0 +1,294 @@ +--- +uuid: 59eae72a-047a-4f33-a5a0-070f6b0a8ca5 +task_id: dom1_pl_todo-add_history_homework_to_my_personal_tasks-3 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Add history homework to my personal tasks + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Add history homework to my personal tasks + context: + id: 01KHJQWJVHKVSWFFWYXVN1SCQ9 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:21.617428+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:21.619339+00:00 + - role: user + content: Add history homework to my personal tasks + attachments: null + created: 2026-02-16 08:08:21.617507+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16ddc7a00>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:21.619354+00:00 + duration_ms: 64.471 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_personal_tasks-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_personal_tasks-4.yaml new file mode 100644 index 000000000..93904d16b --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_personal_tasks-4.yaml @@ -0,0 +1,294 @@ +--- +uuid: 66ac2686-afd5-4852-9091-e86845a726e5 +task_id: dom1_pl_todo-add_history_homework_to_my_personal_tasks-4 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Add history homework to my personal tasks + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Add history homework to my personal tasks + context: + id: 01KHJQWK60QNB68TTT2VHY4XJJ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:21.952114+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:21.953951+00:00 + - role: user + content: Add history homework to my personal tasks + attachments: null + created: 2026-02-16 08:08:21.952186+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d82cd50>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:21.953964+00:00 + duration_ms: 68.961 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_tasks-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_tasks-0.yaml new file mode 100644 index 000000000..83f468eff --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_tasks-0.yaml @@ -0,0 +1,294 @@ +--- +uuid: 4efe0428-e69b-46a1-bf11-b535373d3443 +task_id: dom1_pl_todo-add_history_homework_to_my_tasks-0 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Add history homework to my tasks + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Add history homework to my tasks + context: + id: 01KHJQWKGN52W8HDX8MA6ATRVX + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:22.293766+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:22.295619+00:00 + - role: user + content: Add history homework to my tasks + attachments: null + created: 2026-02-16 08:08:22.293842+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c618d50>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:22.295633+00:00 + duration_ms: 156.048 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_tasks-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_tasks-1.yaml new file mode 100644 index 000000000..e5e99ca86 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_tasks-1.yaml @@ -0,0 +1,294 @@ +--- +uuid: 267bece7-921e-4ecd-81b2-c4842eb3f0ec +task_id: dom1_pl_todo-add_history_homework_to_my_tasks-1 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Add history homework to my tasks + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Add history homework to my tasks + context: + id: 01KHJQWKYJV06DZRTCNKNT3MBW + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:22.738981+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:22.741421+00:00 + - role: user + content: Add history homework to my tasks + attachments: null + created: 2026-02-16 08:08:22.739061+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d3494e0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:22.741436+00:00 + duration_ms: 157.901 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_tasks-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_tasks-2.yaml new file mode 100644 index 000000000..e3867d427 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_tasks-2.yaml @@ -0,0 +1,294 @@ +--- +uuid: caaab6d4-a4af-4ae1-9095-50f6a7c9f13d +task_id: dom1_pl_todo-add_history_homework_to_my_tasks-2 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Add history homework to my tasks + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Add history homework to my tasks + context: + id: 01KHJQWMBGHHSRFZF28N5YFXKF + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:23.152489+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:23.155490+00:00 + - role: user + content: Add history homework to my tasks + attachments: null + created: 2026-02-16 08:08:23.152566+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17db1ee50>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:23.155505+00:00 + duration_ms: 90.57 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_tasks-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_tasks-3.yaml new file mode 100644 index 000000000..d3498d5b2 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_tasks-3.yaml @@ -0,0 +1,294 @@ +--- +uuid: dc917a36-871d-4db8-8ad2-9f20f6a4d30e +task_id: dom1_pl_todo-add_history_homework_to_my_tasks-3 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Add history homework to my tasks + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Add history homework to my tasks + context: + id: 01KHJQWMQJ1EK1DE0K4HJFFNRJ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:23.538366+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:23.540249+00:00 + - role: user + content: Add history homework to my tasks + attachments: null + created: 2026-02-16 08:08:23.538446+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d7c05c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:23.540264+00:00 + duration_ms: 61.883 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_tasks-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_tasks-4.yaml new file mode 100644 index 000000000..dbdfaf01f --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-add_history_homework_to_my_tasks-4.yaml @@ -0,0 +1,294 @@ +--- +uuid: 261874d7-3029-488b-8c45-15440a6a4a2b +task_id: dom1_pl_todo-add_history_homework_to_my_tasks-4 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Add history homework to my tasks + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Add history homework to my tasks + context: + id: 01KHJQWN1Q9MWDKFMAVHHMKQEC + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:23.864062+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:23.865921+00:00 + - role: user + content: Add history homework to my tasks + attachments: null + created: 2026-02-16 08:08:23.864138+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16ddc62a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:23.865935+00:00 + duration_ms: 61.706 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_shopping_list-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_shopping_list-0.yaml new file mode 100644 index 000000000..dfa91e2b4 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_shopping_list-0.yaml @@ -0,0 +1,294 @@ +--- +uuid: 1d2bc484-f2d1-45b6-be0c-749deaac06ca +task_id: dom1_pl_todo-put_apples_on_the_shopping_list-0 +model_id: gemma-3-27b-it +category: todo +task: + input_text: put apples on the shopping list + expect_changes: + todo.trader_joe_s: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.trader_joe_s: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: put apples on the shopping list + context: + id: 01KHJQWBFR36QS14VHFETXQFKK + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:14.072493+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:14.084127+00:00 + - role: user + content: put apples on the shopping list + attachments: null + created: 2026-02-16 08:08:14.072569+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17de549e0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:14.084147+00:00 + duration_ms: 69.28 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_shopping_list-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_shopping_list-1.yaml new file mode 100644 index 000000000..d163eec70 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_shopping_list-1.yaml @@ -0,0 +1,294 @@ +--- +uuid: 06316805-2b6a-407f-8231-46a56c507f05 +task_id: dom1_pl_todo-put_apples_on_the_shopping_list-1 +model_id: gemma-3-27b-it +category: todo +task: + input_text: put apples on the shopping list + expect_changes: + todo.trader_joe_s: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.trader_joe_s: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: put apples on the shopping list + context: + id: 01KHJQWBSB1Y0X67XX2XSKW6TK + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:14.379148+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:14.381010+00:00 + - role: user + content: put apples on the shopping list + attachments: null + created: 2026-02-16 08:08:14.379249+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d9740f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:14.381023+00:00 + duration_ms: 58.891 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_shopping_list-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_shopping_list-2.yaml new file mode 100644 index 000000000..a14134390 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_shopping_list-2.yaml @@ -0,0 +1,294 @@ +--- +uuid: 6a4c839a-a6eb-4fdc-97bd-4cbd1867b0e7 +task_id: dom1_pl_todo-put_apples_on_the_shopping_list-2 +model_id: gemma-3-27b-it +category: todo +task: + input_text: put apples on the shopping list + expect_changes: + todo.trader_joe_s: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.trader_joe_s: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: put apples on the shopping list + context: + id: 01KHJQWC2XSZ2E1J9800F83ERH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:14.685946+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:14.687792+00:00 + - role: user + content: put apples on the shopping list + attachments: null + created: 2026-02-16 08:08:14.686025+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16dd4d6f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:14.687805+00:00 + duration_ms: 59.747 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_shopping_list-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_shopping_list-3.yaml new file mode 100644 index 000000000..c5c5f2aa7 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_shopping_list-3.yaml @@ -0,0 +1,294 @@ +--- +uuid: d223df62-255f-4862-a39d-3841ba339e05 +task_id: dom1_pl_todo-put_apples_on_the_shopping_list-3 +model_id: gemma-3-27b-it +category: todo +task: + input_text: put apples on the shopping list + expect_changes: + todo.trader_joe_s: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.trader_joe_s: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: put apples on the shopping list + context: + id: 01KHJQWCDHZ46G1SBPPV1XZJA0 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:15.025143+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:15.030552+00:00 + - role: user + content: put apples on the shopping list + attachments: null + created: 2026-02-16 08:08:15.025219+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17dfd7690>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:15.030566+00:00 + duration_ms: 65.687 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_shopping_list-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_shopping_list-4.yaml new file mode 100644 index 000000000..ccd65982d --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_shopping_list-4.yaml @@ -0,0 +1,294 @@ +--- +uuid: e8b722bd-0745-456b-9808-6b759c6cb6d7 +task_id: dom1_pl_todo-put_apples_on_the_shopping_list-4 +model_id: gemma-3-27b-it +category: todo +task: + input_text: put apples on the shopping list + expect_changes: + todo.trader_joe_s: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.trader_joe_s: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: put apples on the shopping list + context: + id: 01KHJQWCS642ECR3BB2XP5A4BX + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:15.398543+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:15.405871+00:00 + - role: user + content: put apples on the shopping list + attachments: null + created: 2026-02-16 08:08:15.398618+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d939f30>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:15.405889+00:00 + duration_ms: 158.998 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_trader_joes_list-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_trader_joes_list-0.yaml new file mode 100644 index 000000000..772381286 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_trader_joes_list-0.yaml @@ -0,0 +1,294 @@ +--- +uuid: 6999ec3a-5f78-4e1f-9334-4f907f211f43 +task_id: dom1_pl_todo-put_apples_on_the_trader_joes_list-0 +model_id: gemma-3-27b-it +category: todo +task: + input_text: put apples on the trader joes list + expect_changes: + todo.trader_joe_s: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.trader_joe_s: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: put apples on the trader joes list + context: + id: 01KHJQW9WEQ4GSGGR51BGQRH99 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:12.430964+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:12.432850+00:00 + - role: user + content: put apples on the trader joes list + attachments: null + created: 2026-02-16 08:08:12.431042+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c7d40f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:12.432863+00:00 + duration_ms: 61.987 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_trader_joes_list-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_trader_joes_list-1.yaml new file mode 100644 index 000000000..7c0569ed0 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_trader_joes_list-1.yaml @@ -0,0 +1,294 @@ +--- +uuid: 845e97bf-8679-43be-a765-dcd0b7efb382 +task_id: dom1_pl_todo-put_apples_on_the_trader_joes_list-1 +model_id: gemma-3-27b-it +category: todo +task: + input_text: put apples on the trader joes list + expect_changes: + todo.trader_joe_s: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.trader_joe_s: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: put apples on the trader joes list + context: + id: 01KHJQWA5RB42X2RXRAY8SC474 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:12.729097+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:12.730922+00:00 + - role: user + content: put apples on the trader joes list + attachments: null + created: 2026-02-16 08:08:12.729175+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d7fbc10>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:12.730935+00:00 + duration_ms: 60.765 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_trader_joes_list-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_trader_joes_list-2.yaml new file mode 100644 index 000000000..208be226f --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_trader_joes_list-2.yaml @@ -0,0 +1,294 @@ +--- +uuid: 02b842e3-e4c4-43aa-9d8f-26ffe75d1096 +task_id: dom1_pl_todo-put_apples_on_the_trader_joes_list-2 +model_id: gemma-3-27b-it +category: todo +task: + input_text: put apples on the trader joes list + expect_changes: + todo.trader_joe_s: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.trader_joe_s: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: put apples on the trader joes list + context: + id: 01KHJQWAG3GM478ZV6G4HZ2PWR + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:13.059906+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:13.062280+00:00 + - role: user + content: put apples on the trader joes list + attachments: null + created: 2026-02-16 08:08:13.059984+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17dd73480>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:13.062293+00:00 + duration_ms: 68.061 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_trader_joes_list-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_trader_joes_list-3.yaml new file mode 100644 index 000000000..af0cb0d43 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_trader_joes_list-3.yaml @@ -0,0 +1,294 @@ +--- +uuid: 003bdc74-fb63-48e9-b04c-64185e946b47 +task_id: dom1_pl_todo-put_apples_on_the_trader_joes_list-3 +model_id: gemma-3-27b-it +category: todo +task: + input_text: put apples on the trader joes list + expect_changes: + todo.trader_joe_s: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.trader_joe_s: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: put apples on the trader joes list + context: + id: 01KHJQWATFY2NCVPE8VHCHC619 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:13.391578+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:13.393480+00:00 + - role: user + content: put apples on the trader joes list + attachments: null + created: 2026-02-16 08:08:13.391652+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17dfa3a00>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:13.393493+00:00 + duration_ms: 68.096 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_trader_joes_list-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_trader_joes_list-4.yaml new file mode 100644 index 000000000..fd93317b3 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_apples_on_the_trader_joes_list-4.yaml @@ -0,0 +1,294 @@ +--- +uuid: 793d419a-c17c-4104-848e-d9d523803392 +task_id: dom1_pl_todo-put_apples_on_the_trader_joes_list-4 +model_id: gemma-3-27b-it +category: todo +task: + input_text: put apples on the trader joes list + expect_changes: + todo.trader_joe_s: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.trader_joe_s: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: put apples on the trader joes list + context: + id: 01KHJQWB522YSN24K5HK8E6S3Q + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:13.730690+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:13.732474+00:00 + - role: user + content: put apples on the trader joes list + attachments: null + created: 2026-02-16 08:08:13.730763+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c74fed0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:13.732488+00:00 + duration_ms: 59.899 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-0.yaml new file mode 100644 index 000000000..a66819fa0 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-0.yaml @@ -0,0 +1,294 @@ +--- +uuid: a5701567-1fe1-437b-a2fd-e2d123d8c7a0 +task_id: dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-0 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Put clean the kitchen on personal tasks list + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Put clean the kitchen on personal tasks list + context: + id: 01KHJQWFBB8VKHGF6X6Z0E2PXQ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:18.027997+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:18.029928+00:00 + - role: user + content: Put clean the kitchen on personal tasks list + attachments: null + created: 2026-02-16 08:08:18.028072+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17dcdbd70>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:18.029943+00:00 + duration_ms: 139.452 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-1.yaml new file mode 100644 index 000000000..758dab11f --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-1.yaml @@ -0,0 +1,294 @@ +--- +uuid: 06cd826b-1bfd-4363-9ead-2783fdca17a4 +task_id: dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-1 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Put clean the kitchen on personal tasks list + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Put clean the kitchen on personal tasks list + context: + id: 01KHJQWFR6ZSGHBGW3BZ2A0CZE + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:18.438221+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:18.465092+00:00 + - role: user + content: Put clean the kitchen on personal tasks list + attachments: null + created: 2026-02-16 08:08:18.438340+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16dea3e20>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:18.465112+00:00 + duration_ms: 201.697 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-2.yaml new file mode 100644 index 000000000..ee5f4f004 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-2.yaml @@ -0,0 +1,294 @@ +--- +uuid: ff1abe02-8251-43d5-ab65-1807f6affdc1 +task_id: dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-2 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Put clean the kitchen on personal tasks list + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Put clean the kitchen on personal tasks list + context: + id: 01KHJQWG6Y5N42CDEWBGTJWWFF + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:18.910770+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:18.912625+00:00 + - role: user + content: Put clean the kitchen on personal tasks list + attachments: null + created: 2026-02-16 08:08:18.910845+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d2bb740>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:18.912640+00:00 + duration_ms: 163.203 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-3.yaml new file mode 100644 index 000000000..cf61ca3e1 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-3.yaml @@ -0,0 +1,294 @@ +--- +uuid: d160ce48-db45-4582-b34e-d71557320647 +task_id: dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-3 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Put clean the kitchen on personal tasks list + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Put clean the kitchen on personal tasks list + context: + id: 01KHJQWGKSA3JTD8R51JJRA7Z1 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:19.321309+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:19.323132+00:00 + - role: user + content: Put clean the kitchen on personal tasks list + attachments: null + created: 2026-02-16 08:08:19.321387+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17de062a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:19.323145+00:00 + duration_ms: 61.411 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-4.yaml new file mode 100644 index 000000000..47b1ca104 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-4.yaml @@ -0,0 +1,294 @@ +--- +uuid: 27b74b57-83c8-4757-b0bb-1a64e1d3618a +task_id: dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-4 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Put clean the kitchen on personal tasks list + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Put clean the kitchen on personal tasks list + context: + id: 01KHJQWGY4BFSWZATVA03394E7 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:19.652434+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:19.654308+00:00 + - role: user + content: Put clean the kitchen on personal tasks list + attachments: null + created: 2026-02-16 08:08:19.652511+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16de0fd70>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:19.654321+00:00 + duration_ms: 58.534 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_my_task_list-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_my_task_list-0.yaml new file mode 100644 index 000000000..893bcc28b --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_my_task_list-0.yaml @@ -0,0 +1,294 @@ +--- +uuid: 23f8aff3-afb8-486f-a39a-0f4d2d3a1379 +task_id: dom1_pl_todo-put_history_homework_on_my_task_list-0 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Put history homework on my task list + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Put history homework on my task list + context: + id: 01KHJQWNBWFGB9E6EE7HXHFJEW + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:24.188377+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:24.190212+00:00 + - role: user + content: Put history homework on my task list + attachments: null + created: 2026-02-16 08:08:24.188455+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d636090>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:24.190238+00:00 + duration_ms: 60.057 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_my_task_list-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_my_task_list-1.yaml new file mode 100644 index 000000000..2a16bee0e --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_my_task_list-1.yaml @@ -0,0 +1,294 @@ +--- +uuid: 32d3adb4-0ed7-4bec-8a32-ba56a844fac4 +task_id: dom1_pl_todo-put_history_homework_on_my_task_list-1 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Put history homework on my task list + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Put history homework on my task list + context: + id: 01KHJQWNP9FJ3Z3M3NKRKH2H8Q + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:24.521327+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:24.523708+00:00 + - role: user + content: Put history homework on my task list + attachments: null + created: 2026-02-16 08:08:24.521403+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16ff0d170>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:24.523721+00:00 + duration_ms: 147.002 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_my_task_list-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_my_task_list-2.yaml new file mode 100644 index 000000000..0e50455bc --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_my_task_list-2.yaml @@ -0,0 +1,294 @@ +--- +uuid: a46061ab-d43c-49ee-99f0-02c7cd9be0fb +task_id: dom1_pl_todo-put_history_homework_on_my_task_list-2 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Put history homework on my task list + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Put history homework on my task list + context: + id: 01KHJQWP43N77X927V4VW3PE18 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:24.963500+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:24.990042+00:00 + - role: user + content: Put history homework on my task list + attachments: null + created: 2026-02-16 08:08:24.963575+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d3bada0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:24.990063+00:00 + duration_ms: 97.147 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_my_task_list-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_my_task_list-3.yaml new file mode 100644 index 000000000..f7978f7c1 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_my_task_list-3.yaml @@ -0,0 +1,294 @@ +--- +uuid: 76975ba4-7f32-47d1-9e76-1c28a17e9c95 +task_id: dom1_pl_todo-put_history_homework_on_my_task_list-3 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Put history homework on my task list + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Put history homework on my task list + context: + id: 01KHJQWPFA0DZC3SDHE93P5A9A + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:25.322549+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:25.324415+00:00 + - role: user + content: Put history homework on my task list + attachments: null + created: 2026-02-16 08:08:25.322627+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16de0c250>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:25.324429+00:00 + duration_ms: 62.219 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_my_task_list-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_my_task_list-4.yaml new file mode 100644 index 000000000..10fefc2b0 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_my_task_list-4.yaml @@ -0,0 +1,294 @@ +--- +uuid: 3edc06d2-589d-4751-9b59-4820940b3a8d +task_id: dom1_pl_todo-put_history_homework_on_my_task_list-4 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Put history homework on my task list + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Put history homework on my task list + context: + id: 01KHJQWPRWFMND188NEZ45PHPV + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:25.628838+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:25.630642+00:00 + - role: user + content: Put history homework on my task list + attachments: null + created: 2026-02-16 08:08:25.628916+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16fff3950>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:25.630655+00:00 + duration_ms: 145.399 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_personal_tasks_list-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_personal_tasks_list-0.yaml new file mode 100644 index 000000000..d766cf3f0 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_personal_tasks_list-0.yaml @@ -0,0 +1,294 @@ +--- +uuid: f87dcd69-8bd2-4cab-b2ce-220fdb12af75 +task_id: dom1_pl_todo-put_history_homework_on_personal_tasks_list-0 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Put history homework on personal tasks list + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Put history homework on personal tasks list + context: + id: 01KHJQWD6WTWEKGPT0WVEXS7E0 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:15.837049+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:15.838954+00:00 + - role: user + content: Put history homework on personal tasks list + attachments: null + created: 2026-02-16 08:08:15.837124+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d0f24b0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:15.838969+00:00 + duration_ms: 64.015 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_personal_tasks_list-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_personal_tasks_list-1.yaml new file mode 100644 index 000000000..e31242761 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_personal_tasks_list-1.yaml @@ -0,0 +1,294 @@ +--- +uuid: 830b9dbf-5f6a-4eb2-b279-f95da04cc830 +task_id: dom1_pl_todo-put_history_homework_on_personal_tasks_list-1 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Put history homework on personal tasks list + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Put history homework on personal tasks list + context: + id: 01KHJQWDGWF58ZBMV72MS1PSMH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:16.156328+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:16.159979+00:00 + - role: user + content: Put history homework on personal tasks list + attachments: null + created: 2026-02-16 08:08:16.156404+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17da7d0c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:16.159993+00:00 + duration_ms: 63.995 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_personal_tasks_list-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_personal_tasks_list-2.yaml new file mode 100644 index 000000000..5c44a048a --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_personal_tasks_list-2.yaml @@ -0,0 +1,294 @@ +--- +uuid: fc263107-7488-4146-9f42-2b9dfab88b2c +task_id: dom1_pl_todo-put_history_homework_on_personal_tasks_list-2 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Put history homework on personal tasks list + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Put history homework on personal tasks list + context: + id: 01KHJQWE7EJ8HJ4R25TWA3X8YF + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:16.878668+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:16.880551+00:00 + - role: user + content: Put history homework on personal tasks list + attachments: null + created: 2026-02-16 08:08:16.878746+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d623d70>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:16.880565+00:00 + duration_ms: 60.799 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_personal_tasks_list-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_personal_tasks_list-3.yaml new file mode 100644 index 000000000..fb50823d4 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_personal_tasks_list-3.yaml @@ -0,0 +1,294 @@ +--- +uuid: 4265ef93-af79-4adc-802a-6f6df2f25872 +task_id: dom1_pl_todo-put_history_homework_on_personal_tasks_list-3 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Put history homework on personal tasks list + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Put history homework on personal tasks list + context: + id: 01KHJQWEGT69NR8RC59Y0VKWAD + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:17.178824+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:17.183742+00:00 + - role: user + content: Put history homework on personal tasks list + attachments: null + created: 2026-02-16 08:08:17.178897+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17de9c720>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:17.183757+00:00 + duration_ms: 173.483 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_personal_tasks_list-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_personal_tasks_list-4.yaml new file mode 100644 index 000000000..7eacff1b2 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_history_homework_on_personal_tasks_list-4.yaml @@ -0,0 +1,294 @@ +--- +uuid: 1faad11a-0095-44bd-b6a4-56af589b17a7 +task_id: dom1_pl_todo-put_history_homework_on_personal_tasks_list-4 +model_id: gemma-3-27b-it +category: todo +task: + input_text: Put history homework on personal tasks list + expect_changes: + todo.personal_tasks: + state: '1' + attributes: null +response: Error talking to API +context: + unexpected_states: + todo.personal_tasks: + expected: + state: '1' + got: + state: '0' + conversation_trace: + - event_type: async_process + data: + text: Put history homework on personal tasks list + context: + id: 01KHJQWEYFYYN4S1XQWG4VYFFK + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:17.615901+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:17.622552+00:00 + - role: user + content: Put history homework on personal tasks list + attachments: null + created: 2026-02-16 08:08:17.615979+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d849900>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:17.622570+00:00 + duration_ms: 144.762 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_milk_on_the_list-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_milk_on_the_list-0.yaml new file mode 100644 index 000000000..0e89aada4 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_milk_on_the_list-0.yaml @@ -0,0 +1,292 @@ +--- +uuid: 9368dd6d-06cc-4d8c-85a1-ddc43f7c148d +task_id: dom1_pl_todo-put_milk_on_the_list-0 +model_id: gemma-3-27b-it +category: todo +task: + input_text: put milk on the list + expect_changes: + todo.personal_tasks: + state: '0' + attributes: null + todo.trader_joe_s: + state: '0' + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: put milk on the list + context: + id: 01KHJQWQ61TCNA22D113798R04 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:26.049457+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:26.051336+00:00 + - role: user + content: put milk on the list + attachments: null + created: 2026-02-16 08:08:26.049536+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d12f950>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:26.051348+00:00 + duration_ms: 56.727 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_milk_on_the_list-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_milk_on_the_list-1.yaml new file mode 100644 index 000000000..e16250ca2 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_milk_on_the_list-1.yaml @@ -0,0 +1,292 @@ +--- +uuid: 89721ace-f4b7-42c1-aa19-0b39fe5b2cd8 +task_id: dom1_pl_todo-put_milk_on_the_list-1 +model_id: gemma-3-27b-it +category: todo +task: + input_text: put milk on the list + expect_changes: + todo.personal_tasks: + state: '0' + attributes: null + todo.trader_joe_s: + state: '0' + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: put milk on the list + context: + id: 01KHJQWQGNYQC35W5Z9TDCRV3Z + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:26.389358+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:26.391272+00:00 + - role: user + content: put milk on the list + attachments: null + created: 2026-02-16 08:08:26.389437+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16fe3cca0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:26.391286+00:00 + duration_ms: 58.792 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_milk_on_the_list-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_milk_on_the_list-2.yaml new file mode 100644 index 000000000..b7c118868 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_milk_on_the_list-2.yaml @@ -0,0 +1,292 @@ +--- +uuid: 2074c9ef-669c-4ba1-ad9d-796e3b21c348 +task_id: dom1_pl_todo-put_milk_on_the_list-2 +model_id: gemma-3-27b-it +category: todo +task: + input_text: put milk on the list + expect_changes: + todo.personal_tasks: + state: '0' + attributes: null + todo.trader_joe_s: + state: '0' + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: put milk on the list + context: + id: 01KHJQWQVYZTX0F1Y7J5TNEN0D + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:26.750113+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:26.751938+00:00 + - role: user + content: put milk on the list + attachments: null + created: 2026-02-16 08:08:26.750190+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17ddee4b0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:26.751951+00:00 + duration_ms: 65.497 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_milk_on_the_list-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_milk_on_the_list-3.yaml new file mode 100644 index 000000000..66e984b3f --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_milk_on_the_list-3.yaml @@ -0,0 +1,292 @@ +--- +uuid: 49b452c3-5b41-4c40-a0ff-c34ac1d6cd85 +task_id: dom1_pl_todo-put_milk_on_the_list-3 +model_id: gemma-3-27b-it +category: todo +task: + input_text: put milk on the list + expect_changes: + todo.personal_tasks: + state: '0' + attributes: null + todo.trader_joe_s: + state: '0' + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: put milk on the list + context: + id: 01KHJQWR60GDQZ53GCVV8EYHFS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:27.072602+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:27.075831+00:00 + - role: user + content: put milk on the list + attachments: null + created: 2026-02-16 08:08:27.072681+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c792e50>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:27.075848+00:00 + duration_ms: 70.649 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_milk_on_the_list-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_milk_on_the_list-4.yaml new file mode 100644 index 000000000..fc625d146 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/dom1_pl_todo-put_milk_on_the_list-4.yaml @@ -0,0 +1,292 @@ +--- +uuid: 9be43af3-83de-44e2-952b-2dc8e78af6a6 +task_id: dom1_pl_todo-put_milk_on_the_list-4 +model_id: gemma-3-27b-it +category: todo +task: + input_text: put milk on the list + expect_changes: + todo.personal_tasks: + state: '0' + attributes: null + todo.trader_joe_s: + state: '0' + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: put milk on the list + context: + id: 01KHJQWRHT0NE9WJQWPPKPMXN1 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:27.450922+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Backyard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: binary_sensor + areas: Backyard + - names: Motion Sensor Battery + domain: sensor + areas: Backyard + - names: Personal Tasks + domain: todo + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + - names: Trader Joe's + domain: todo + - names: Tv + domain: switch + areas: Living Room + - names: Tv Energy + domain: sensor + areas: Living Room + created: 2026-02-16 08:08:27.452846+00:00 + - role: user + content: put milk on the list + attachments: null + created: 2026-02-16 08:08:27.450997+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d31f320>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Personal Tasks'', "Trader Joe''s"]), ''status'': + In([''needs_action'', ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:27.452860+00:00 + duration_ms: 172.145 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-close_the_garage_door-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-close_the_garage_door-0.yaml new file mode 100644 index 000000000..7ce8e7502 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-close_the_garage_door-0.yaml @@ -0,0 +1,295 @@ +--- +uuid: 432c3390-92a6-4889-8557-2684c255bb23 +task_id: home1_us_cover_garage-close_the_garage_door-0 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the garage door + expect_changes: + cover.garage_door_opener: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the garage door + context: + id: 01KHJQWRZPMW2AQ2NXZTSZTMQR + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:27.894811+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:27.896703+00:00 + - role: user + content: Close the garage door + attachments: null + created: 2026-02-16 08:08:27.894885+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d6205c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:27.896717+00:00 + duration_ms: 152.437 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-close_the_garage_door-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-close_the_garage_door-1.yaml new file mode 100644 index 000000000..c9dd1867d --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-close_the_garage_door-1.yaml @@ -0,0 +1,295 @@ +--- +uuid: 7e048367-6966-4c77-9f55-401cf37df0c2 +task_id: home1_us_cover_garage-close_the_garage_door-1 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the garage door + expect_changes: + cover.garage_door_opener: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the garage door + context: + id: 01KHJQWSCY895KYVB0VZA8MZX8 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:28.319013+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:28.320860+00:00 + - role: user + content: Close the garage door + attachments: null + created: 2026-02-16 08:08:28.319086+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d332610>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:28.320873+00:00 + duration_ms: 60.807 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-close_the_garage_door-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-close_the_garage_door-2.yaml new file mode 100644 index 000000000..18f1678a9 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-close_the_garage_door-2.yaml @@ -0,0 +1,295 @@ +--- +uuid: 4116f905-d6c7-4858-832d-9d5fd86aaa1b +task_id: home1_us_cover_garage-close_the_garage_door-2 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the garage door + expect_changes: + cover.garage_door_opener: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the garage door + context: + id: 01KHJQWSR4K732HBBNCQDNXFA5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:28.676125+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:28.678045+00:00 + - role: user + content: Close the garage door + attachments: null + created: 2026-02-16 08:08:28.676200+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16ddc4880>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:28.678058+00:00 + duration_ms: 149.106 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-close_the_garage_door-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-close_the_garage_door-3.yaml new file mode 100644 index 000000000..0693dfe05 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-close_the_garage_door-3.yaml @@ -0,0 +1,295 @@ +--- +uuid: 3245d725-444c-40b9-8348-34d589f94d39 +task_id: home1_us_cover_garage-close_the_garage_door-3 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the garage door + expect_changes: + cover.garage_door_opener: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the garage door + context: + id: 01KHJQWTCB4Y56GXF7XRYC7JPR + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:29.323766+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:29.325692+00:00 + - role: user + content: Close the garage door + attachments: null + created: 2026-02-16 08:08:29.323844+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d8befb0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:29.325704+00:00 + duration_ms: 66.051 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-close_the_garage_door-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-close_the_garage_door-4.yaml new file mode 100644 index 000000000..72361b713 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-close_the_garage_door-4.yaml @@ -0,0 +1,295 @@ +--- +uuid: a9aeee57-21c3-491a-a4b0-4ed4f0bdb9d3 +task_id: home1_us_cover_garage-close_the_garage_door-4 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the garage door + expect_changes: + cover.garage_door_opener: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the garage door + context: + id: 01KHJQWTQG5SRGMKYF3E8MJADV + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:29.680788+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:29.682723+00:00 + - role: user + content: Close the garage door + attachments: null + created: 2026-02-16 08:08:29.680865+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d3097a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:29.682737+00:00 + duration_ms: 157.282 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-open_the_garage_door-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-open_the_garage_door-0.yaml new file mode 100644 index 000000000..c6a4eb0f1 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-open_the_garage_door-0.yaml @@ -0,0 +1,295 @@ +--- +uuid: e0b6cb02-16c8-47d7-9444-07298cdb6d91 +task_id: home1_us_cover_garage-open_the_garage_door-0 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the garage door + expect_changes: + cover.garage_door_opener: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the garage door + context: + id: 01KHJQWWZW3GC8RN3ZXMNWZJ2A + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:31.996501+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:31.998439+00:00 + - role: user + content: Open the garage door + attachments: null + created: 2026-02-16 08:08:31.996579+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17db6b740>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:31.998453+00:00 + duration_ms: 58.164 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-open_the_garage_door-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-open_the_garage_door-1.yaml new file mode 100644 index 000000000..8617ef70c --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-open_the_garage_door-1.yaml @@ -0,0 +1,295 @@ +--- +uuid: 1a32e57e-0267-4a0b-b23b-febc9a008603 +task_id: home1_us_cover_garage-open_the_garage_door-1 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the garage door + expect_changes: + cover.garage_door_opener: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the garage door + context: + id: 01KHJQWXA0RV3TJVSBASR9M1Z6 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:32.320849+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:32.322723+00:00 + - role: user + content: Open the garage door + attachments: null + created: 2026-02-16 08:08:32.320923+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17de53060>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:32.322736+00:00 + duration_ms: 62.257 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-open_the_garage_door-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-open_the_garage_door-2.yaml new file mode 100644 index 000000000..ac4687ec0 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-open_the_garage_door-2.yaml @@ -0,0 +1,295 @@ +--- +uuid: 866d83dd-6654-421c-af77-cd0a253b240b +task_id: home1_us_cover_garage-open_the_garage_door-2 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the garage door + expect_changes: + cover.garage_door_opener: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the garage door + context: + id: 01KHJQWXMGCTQ7ZT0S44M6ZT93 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:32.656693+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:32.658571+00:00 + - role: user + content: Open the garage door + attachments: null + created: 2026-02-16 08:08:32.656768+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c896980>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:32.658583+00:00 + duration_ms: 154.273 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-open_the_garage_door-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-open_the_garage_door-3.yaml new file mode 100644 index 000000000..37810a1fd --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-open_the_garage_door-3.yaml @@ -0,0 +1,295 @@ +--- +uuid: 8de9c602-35b1-41c2-8d37-23fd58e9b534 +task_id: home1_us_cover_garage-open_the_garage_door-3 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the garage door + expect_changes: + cover.garage_door_opener: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the garage door + context: + id: 01KHJQWY22SES1G002X0VYKEC3 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:33.090519+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:33.094451+00:00 + - role: user + content: Open the garage door + attachments: null + created: 2026-02-16 08:08:33.090597+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17daa7ab0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:33.094466+00:00 + duration_ms: 148.602 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-open_the_garage_door-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-open_the_garage_door-4.yaml new file mode 100644 index 000000000..50c7676b6 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-open_the_garage_door-4.yaml @@ -0,0 +1,295 @@ +--- +uuid: d2022b49-eef8-40c6-8d86-f2ec302ccfd6 +task_id: home1_us_cover_garage-open_the_garage_door-4 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the garage door + expect_changes: + cover.garage_door_opener: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the garage door + context: + id: 01KHJQWYFY0KNXF2ZWQS3CM5KA + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:33.534155+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:33.545921+00:00 + - role: user + content: Open the garage door + attachments: null + created: 2026-02-16 08:08:33.534254+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17dd7a770>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:33.545943+00:00 + duration_ms: 156.159 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-please_open_the_garage_door-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-please_open_the_garage_door-0.yaml new file mode 100644 index 000000000..a81eceb3f --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-please_open_the_garage_door-0.yaml @@ -0,0 +1,295 @@ +--- +uuid: 9f248a71-255f-47df-93ba-c03f13a26efd +task_id: home1_us_cover_garage-please_open_the_garage_door-0 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Please open the garage door + expect_changes: + cover.garage_door_opener: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Please open the garage door + context: + id: 01KHJQWV4SCAK1NPZGHNCYRKSQ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:30.105774+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:30.107701+00:00 + - role: user + content: Please open the garage door + attachments: null + created: 2026-02-16 08:08:30.105850+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c8e1dd0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:30.107714+00:00 + duration_ms: 69.857 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-please_open_the_garage_door-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-please_open_the_garage_door-1.yaml new file mode 100644 index 000000000..a421bd073 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-please_open_the_garage_door-1.yaml @@ -0,0 +1,295 @@ +--- +uuid: 2c0147c1-d2b6-4fbf-9050-2262cf00dd36 +task_id: home1_us_cover_garage-please_open_the_garage_door-1 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Please open the garage door + expect_changes: + cover.garage_door_opener: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Please open the garage door + context: + id: 01KHJQWVF7491X4YCKA5VPDQ6E + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:30.439722+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:30.441927+00:00 + - role: user + content: Please open the garage door + attachments: null + created: 2026-02-16 08:08:30.439797+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d8ac510>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:30.441941+00:00 + duration_ms: 146.84 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-please_open_the_garage_door-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-please_open_the_garage_door-2.yaml new file mode 100644 index 000000000..7a117576c --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-please_open_the_garage_door-2.yaml @@ -0,0 +1,295 @@ +--- +uuid: b22eb114-f88b-440c-b7b4-16c8644b77d7 +task_id: home1_us_cover_garage-please_open_the_garage_door-2 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Please open the garage door + expect_changes: + cover.garage_door_opener: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Please open the garage door + context: + id: 01KHJQWVVW0H9G9A0D1K2FKDDP + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:30.844798+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:30.851475+00:00 + - role: user + content: Please open the garage door + attachments: null + created: 2026-02-16 08:08:30.844873+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c7d6b90>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:30.851492+00:00 + duration_ms: 67.291 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-please_open_the_garage_door-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-please_open_the_garage_door-3.yaml new file mode 100644 index 000000000..6ba933872 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-please_open_the_garage_door-3.yaml @@ -0,0 +1,295 @@ +--- +uuid: c4bef9fa-4e33-4e17-914e-479dd8626396 +task_id: home1_us_cover_garage-please_open_the_garage_door-3 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Please open the garage door + expect_changes: + cover.garage_door_opener: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Please open the garage door + context: + id: 01KHJQWW7103HHGK8A4VAW9F1F + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:31.201871+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:31.205563+00:00 + - role: user + content: Please open the garage door + attachments: null + created: 2026-02-16 08:08:31.201951+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16dd95430>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:31.205578+00:00 + duration_ms: 69.968 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-please_open_the_garage_door-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-please_open_the_garage_door-4.yaml new file mode 100644 index 000000000..3d44d835c --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_cover_garage-please_open_the_garage_door-4.yaml @@ -0,0 +1,295 @@ +--- +uuid: 8b64a377-b022-4c23-9d09-ada045db26f2 +task_id: home1_us_cover_garage-please_open_the_garage_door-4 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Please open the garage door + expect_changes: + cover.garage_door_opener: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.garage_door_opener: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Please open the garage door + context: + id: 01KHJQWWJGGJ280P2Q9VESB6FW + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:31.568787+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:31.572343+00:00 + - role: user + content: Please open the garage door + attachments: null + created: 2026-02-16 08:08:31.568863+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16fe3f270>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:31.572357+00:00 + duration_ms: 148.252 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_media_player-set_the_volume_to_0-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_media_player-set_the_volume_to_0-0.yaml new file mode 100644 index 000000000..171422c63 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_media_player-set_the_volume_to_0-0.yaml @@ -0,0 +1,285 @@ +--- +uuid: 411ede8b-69ec-4bc6-8f58-685f05fe3efa +task_id: home1_us_media_player-set_the_volume_to_0-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Set the volume to 0% +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Set the volume to 0% + context: + id: 01KHJQX2E8EWW9FP01W0XJPKGT + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:37.576622+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:37.580144+00:00 + - role: user + content: Set the volume to 0% + attachments: null + created: 2026-02-16 08:08:37.576700+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c64ceb0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:37.580158+00:00 + duration_ms: 173.093 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_media_player-set_the_volume_to_0-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_media_player-set_the_volume_to_0-1.yaml new file mode 100644 index 000000000..bdea7a1d1 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_media_player-set_the_volume_to_0-1.yaml @@ -0,0 +1,285 @@ +--- +uuid: 5393383c-80fc-4ff3-bb29-ea396fe9b8e7 +task_id: home1_us_media_player-set_the_volume_to_0-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Set the volume to 0% +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Set the volume to 0% + context: + id: 01KHJQX2W3ZGAS3656FSMWNGWK + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:38.019533+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:38.022973+00:00 + - role: user + content: Set the volume to 0% + attachments: null + created: 2026-02-16 08:08:38.019609+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c7ab110>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:38.022988+00:00 + duration_ms: 147.319 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_media_player-set_the_volume_to_0-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_media_player-set_the_volume_to_0-2.yaml new file mode 100644 index 000000000..ad3da9f5e --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_media_player-set_the_volume_to_0-2.yaml @@ -0,0 +1,285 @@ +--- +uuid: fa34fec4-a7bd-415f-b51e-13aad53d972f +task_id: home1_us_media_player-set_the_volume_to_0-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Set the volume to 0% +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Set the volume to 0% + context: + id: 01KHJQX39X07J371AFMQWPG8KF + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:38.461710+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:38.463662+00:00 + - role: user + content: Set the volume to 0% + attachments: null + created: 2026-02-16 08:08:38.461790+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d2a2350>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:38.463675+00:00 + duration_ms: 65.425 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_media_player-set_the_volume_to_0-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_media_player-set_the_volume_to_0-3.yaml new file mode 100644 index 000000000..3bf8a146d --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_media_player-set_the_volume_to_0-3.yaml @@ -0,0 +1,285 @@ +--- +uuid: d93ce0ef-6222-485b-9e1a-e1638b4d0b90 +task_id: home1_us_media_player-set_the_volume_to_0-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Set the volume to 0% +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Set the volume to 0% + context: + id: 01KHJQX3KQQX9SYSE266JFBB9B + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:38.775814+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:38.778243+00:00 + - role: user + content: Set the volume to 0% + attachments: null + created: 2026-02-16 08:08:38.775887+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17db7f060>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:38.778256+00:00 + duration_ms: 140.148 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_media_player-set_the_volume_to_0-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_media_player-set_the_volume_to_0-4.yaml new file mode 100644 index 000000000..33c215a9f --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_media_player-set_the_volume_to_0-4.yaml @@ -0,0 +1,285 @@ +--- +uuid: 875212da-a280-4239-b32d-b04c8f5b9f2a +task_id: home1_us_media_player-set_the_volume_to_0-4 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Set the volume to 0% +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Set the volume to 0% + context: + id: 01KHJQX40T9GPEZDFCWP64FQXG + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:39.194475+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:39.201443+00:00 + - role: user + content: Set the volume to 0% + attachments: null + created: 2026-02-16 08:08:39.194557+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17db8d2d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:39.201460+00:00 + duration_ms: 152.949 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_all_the_locks_please-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_all_the_locks_please-0.yaml new file mode 100644 index 000000000..2157221fd --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_all_the_locks_please-0.yaml @@ -0,0 +1,293 @@ +--- +uuid: a68e610a-0825-44d9-acfe-c3d00916247a +task_id: home1_us_smart_lock-lock_all_the_locks_please-0 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock all the locks please + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null + lock.rear_door_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Lock all the locks please + context: + id: 01KHJQXD7ZJWYK1ADHM0VFZ70Y + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:48.640012+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:48.641842+00:00 + - role: user + content: Lock all the locks please + attachments: null + created: 2026-02-16 08:08:48.640085+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16de0c880>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:48.641856+00:00 + duration_ms: 59.171 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_all_the_locks_please-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_all_the_locks_please-1.yaml new file mode 100644 index 000000000..708f22c30 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_all_the_locks_please-1.yaml @@ -0,0 +1,293 @@ +--- +uuid: dc22245a-f2fb-4ce3-be5f-f2f350771acc +task_id: home1_us_smart_lock-lock_all_the_locks_please-1 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock all the locks please + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null + lock.rear_door_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Lock all the locks please + context: + id: 01KHJQXDKNZ0EMCS4VAS8SMA2Z + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:49.013355+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:49.017121+00:00 + - role: user + content: Lock all the locks please + attachments: null + created: 2026-02-16 08:08:49.013431+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17da333d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:49.017137+00:00 + duration_ms: 70.045 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_all_the_locks_please-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_all_the_locks_please-2.yaml new file mode 100644 index 000000000..66e1f4ef3 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_all_the_locks_please-2.yaml @@ -0,0 +1,293 @@ +--- +uuid: 38603f97-560f-435b-a015-20e8edaa259b +task_id: home1_us_smart_lock-lock_all_the_locks_please-2 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock all the locks please + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null + lock.rear_door_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Lock all the locks please + context: + id: 01KHJQXDYDGAXNY1BFY4Z9HS4V + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:49.357135+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:49.359558+00:00 + - role: user + content: Lock all the locks please + attachments: null + created: 2026-02-16 08:08:49.357209+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17db1ceb0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:49.359572+00:00 + duration_ms: 147.532 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_all_the_locks_please-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_all_the_locks_please-3.yaml new file mode 100644 index 000000000..ac1267cae --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_all_the_locks_please-3.yaml @@ -0,0 +1,293 @@ +--- +uuid: 7d502613-d948-401d-b33b-79760bff1c0c +task_id: home1_us_smart_lock-lock_all_the_locks_please-3 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock all the locks please + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null + lock.rear_door_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Lock all the locks please + context: + id: 01KHJQXECKHX4TN8J8R1SR5HGP + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:49.811390+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:49.813350+00:00 + - role: user + content: Lock all the locks please + attachments: null + created: 2026-02-16 08:08:49.811468+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16eb38720>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:49.813364+00:00 + duration_ms: 64.878 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_all_the_locks_please-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_all_the_locks_please-4.yaml new file mode 100644 index 000000000..b5e6c8e4b --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_all_the_locks_please-4.yaml @@ -0,0 +1,293 @@ +--- +uuid: c82876cc-8a9f-4efe-a497-646e35b12aee +task_id: home1_us_smart_lock-lock_all_the_locks_please-4 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock all the locks please + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null + lock.rear_door_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Lock all the locks please + context: + id: 01KHJQXEQAW7EWMS9BQN58QDCX + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:50.154411+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:50.156312+00:00 + - role: user + content: Lock all the locks please + attachments: null + created: 2026-02-16 08:08:50.154488+00:00 + tools: + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17de54040>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:50.156327+00:00 + duration_ms: 173.966 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_door-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_door-0.yaml new file mode 100644 index 000000000..0738c0f69 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_door-0.yaml @@ -0,0 +1,293 @@ +--- +uuid: ec266ec5-4ab0-4e35-8f50-e7c6868ae86a +task_id: home1_us_smart_lock-lock_the_door-0 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the door + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null + lock.rear_door_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Lock the door + context: + id: 01KHJQXGTZ19VAYBKRNZFTCE0J + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:52.319188+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:52.321036+00:00 + - role: user + content: Lock the door + attachments: null + created: 2026-02-16 08:08:52.319302+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16ea2b480>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:52.321048+00:00 + duration_ms: 143.031 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_door-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_door-1.yaml new file mode 100644 index 000000000..0793fc90d --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_door-1.yaml @@ -0,0 +1,293 @@ +--- +uuid: 8165a24b-009a-4e84-97ca-7c11bef3a625 +task_id: home1_us_smart_lock-lock_the_door-1 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the door + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null + lock.rear_door_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Lock the door + context: + id: 01KHJQXH7ED19W12MYPNMF1ENR + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:52.718409+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:52.723754+00:00 + - role: user + content: Lock the door + attachments: null + created: 2026-02-16 08:08:52.718487+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d026b90>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:52.723770+00:00 + duration_ms: 66.237 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_door-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_door-2.yaml new file mode 100644 index 000000000..9f18a2e3d --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_door-2.yaml @@ -0,0 +1,293 @@ +--- +uuid: 736563d1-a896-4de4-b087-1636891227db +task_id: home1_us_smart_lock-lock_the_door-2 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the door + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null + lock.rear_door_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Lock the door + context: + id: 01KHJQXHHRHDFS4GHJ0P9JKKF1 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:53.049090+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:53.051011+00:00 + - role: user + content: Lock the door + attachments: null + created: 2026-02-16 08:08:53.049165+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d89ecf0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:53.051026+00:00 + duration_ms: 61.743 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_door-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_door-3.yaml new file mode 100644 index 000000000..d4f98d19f --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_door-3.yaml @@ -0,0 +1,293 @@ +--- +uuid: 7c8b077f-1a56-47bd-bc1b-25a87de0007d +task_id: home1_us_smart_lock-lock_the_door-3 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the door + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null + lock.rear_door_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Lock the door + context: + id: 01KHJQXHWKKK124A9XQQFZG5HJ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:53.395515+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:53.398014+00:00 + - role: user + content: Lock the door + attachments: null + created: 2026-02-16 08:08:53.395595+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d315dd0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:53.398029+00:00 + duration_ms: 63.216 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_door-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_door-4.yaml new file mode 100644 index 000000000..ff908af39 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_door-4.yaml @@ -0,0 +1,293 @@ +--- +uuid: d5323e16-8273-4e08-b836-02d2089f6735 +task_id: home1_us_smart_lock-lock_the_door-4 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the door + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null + lock.rear_door_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Lock the door + context: + id: 01KHJQXJ6Q5RRKQFQ8T6JR0JZR + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:53.719459+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:53.723171+00:00 + - role: user + content: Lock the door + attachments: null + created: 2026-02-16 08:08:53.719539+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d0ba090>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:53.723186+00:00 + duration_ms: 63.411 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_entry_lock-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_entry_lock-0.yaml new file mode 100644 index 000000000..bd7b5fc4e --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_entry_lock-0.yaml @@ -0,0 +1,295 @@ +--- +uuid: 215fb392-1c99-4f45-93db-f09f81e3ab7c +task_id: home1_us_smart_lock-lock_the_entry_lock-0 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the entry lock + expect_changes: + lock.smart_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: locked + got: + state: unlocked + conversation_trace: + - event_type: async_process + data: + text: Lock the entry lock + context: + id: 01KHJQX65K3N0NXAVBE13EQCNP + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:41.395461+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:41.397345+00:00 + - role: user + content: Lock the entry lock + attachments: null + created: 2026-02-16 08:08:41.395540+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc185dddf30>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:41.397358+00:00 + duration_ms: 142.944 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_entry_lock-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_entry_lock-1.yaml new file mode 100644 index 000000000..73fd8a405 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_entry_lock-1.yaml @@ -0,0 +1,295 @@ +--- +uuid: 5b38df11-223d-4de7-98e4-e18d46c6a33d +task_id: home1_us_smart_lock-lock_the_entry_lock-1 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the entry lock + expect_changes: + lock.smart_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: locked + got: + state: unlocked + conversation_trace: + - event_type: async_process + data: + text: Lock the entry lock + context: + id: 01KHJQX6K6WSGMFP90SK3FJE9Q + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:41.830788+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:41.833213+00:00 + - role: user + content: Lock the entry lock + attachments: null + created: 2026-02-16 08:08:41.830863+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16fefb5e0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:41.833238+00:00 + duration_ms: 152.564 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_entry_lock-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_entry_lock-2.yaml new file mode 100644 index 000000000..47df78bf9 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_entry_lock-2.yaml @@ -0,0 +1,295 @@ +--- +uuid: 589a12c3-6bc6-4f3c-abdf-e43da050a576 +task_id: home1_us_smart_lock-lock_the_entry_lock-2 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the entry lock + expect_changes: + lock.smart_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: locked + got: + state: unlocked + conversation_trace: + - event_type: async_process + data: + text: Lock the entry lock + context: + id: 01KHJQX736Z415ZYBE9YRS098H + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:42.342156+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:42.344079+00:00 + - role: user + content: Lock the entry lock + attachments: null + created: 2026-02-16 08:08:42.342258+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16ddb6cf0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:42.344092+00:00 + duration_ms: 75.161 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_entry_lock-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_entry_lock-3.yaml new file mode 100644 index 000000000..ffff67915 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_entry_lock-3.yaml @@ -0,0 +1,295 @@ +--- +uuid: ab8ff051-08cc-4178-8845-6b300add1fcd +task_id: home1_us_smart_lock-lock_the_entry_lock-3 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the entry lock + expect_changes: + lock.smart_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: locked + got: + state: unlocked + conversation_trace: + - event_type: async_process + data: + text: Lock the entry lock + context: + id: 01KHJQX7EK1ZXTE6592RF9H6A4 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:42.707888+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:42.709850+00:00 + - role: user + content: Lock the entry lock + attachments: null + created: 2026-02-16 08:08:42.707966+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17ddef8a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:42.709865+00:00 + duration_ms: 64.83 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_entry_lock-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_entry_lock-4.yaml new file mode 100644 index 000000000..4d2327808 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_entry_lock-4.yaml @@ -0,0 +1,295 @@ +--- +uuid: b364f78f-8556-42ea-bc79-62995647912d +task_id: home1_us_smart_lock-lock_the_entry_lock-4 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the entry lock + expect_changes: + lock.smart_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: locked + got: + state: unlocked + conversation_trace: + - event_type: async_process + data: + text: Lock the entry lock + context: + id: 01KHJQX7S0NDRM53N20W1NG49V + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:43.040818+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:43.046531+00:00 + - role: user + content: Lock the entry lock + attachments: null + created: 2026-02-16 08:08:43.040897+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d634880>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:43.046550+00:00 + duration_ms: 62.121 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_front_door_lock-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_front_door_lock-0.yaml new file mode 100644 index 000000000..7eab8271f --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_front_door_lock-0.yaml @@ -0,0 +1,295 @@ +--- +uuid: 473896ab-d27d-4e12-aff6-ef48fad73c63 +task_id: home1_us_smart_lock-lock_the_front_door_lock-0 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the front door lock + expect_changes: + lock.smart_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: locked + got: + state: unlocked + conversation_trace: + - event_type: async_process + data: + text: Lock the front door lock + context: + id: 01KHJQX4DXGPFEG88BZT0RZAW1 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:39.614093+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:39.616045+00:00 + - role: user + content: Lock the front door lock + attachments: null + created: 2026-02-16 08:08:39.614168+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16fe2fe20>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:39.616059+00:00 + duration_ms: 58.595 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_front_door_lock-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_front_door_lock-1.yaml new file mode 100644 index 000000000..9cd5c24c2 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_front_door_lock-1.yaml @@ -0,0 +1,295 @@ +--- +uuid: 85df6c09-f02d-480d-a359-79cb473caab2 +task_id: home1_us_smart_lock-lock_the_front_door_lock-1 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the front door lock + expect_changes: + lock.smart_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: locked + got: + state: unlocked + conversation_trace: + - event_type: async_process + data: + text: Lock the front door lock + context: + id: 01KHJQX4R3MKEMB1NYQMQQ76Q5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:39.939907+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:39.943245+00:00 + - role: user + content: Lock the front door lock + attachments: null + created: 2026-02-16 08:08:39.939983+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d8f66c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:39.943262+00:00 + duration_ms: 145.578 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_front_door_lock-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_front_door_lock-2.yaml new file mode 100644 index 000000000..531dbe8be --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_front_door_lock-2.yaml @@ -0,0 +1,295 @@ +--- +uuid: c0b70229-fd96-4ae0-9ce2-83274df34222 +task_id: home1_us_smart_lock-lock_the_front_door_lock-2 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the front door lock + expect_changes: + lock.smart_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: locked + got: + state: unlocked + conversation_trace: + - event_type: async_process + data: + text: Lock the front door lock + context: + id: 01KHJQX56B6PVTBJRBY3P3Q2H5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:40.395322+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:40.397215+00:00 + - role: user + content: Lock the front door lock + attachments: null + created: 2026-02-16 08:08:40.395401+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16de4d2d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:40.397241+00:00 + duration_ms: 66.659 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_front_door_lock-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_front_door_lock-3.yaml new file mode 100644 index 000000000..8a1643e15 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_front_door_lock-3.yaml @@ -0,0 +1,295 @@ +--- +uuid: 26bc7c84-94a0-47b7-b123-ec3fcb0ba35b +task_id: home1_us_smart_lock-lock_the_front_door_lock-3 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the front door lock + expect_changes: + lock.smart_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: locked + got: + state: unlocked + conversation_trace: + - event_type: async_process + data: + text: Lock the front door lock + context: + id: 01KHJQX5GFPGFYV4JW9G8JVKCY + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:40.719320+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:40.726084+00:00 + - role: user + content: Lock the front door lock + attachments: null + created: 2026-02-16 08:08:40.719398+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d359b10>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:40.726101+00:00 + duration_ms: 70.447 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_front_door_lock-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_front_door_lock-4.yaml new file mode 100644 index 000000000..73ea4cafc --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_front_door_lock-4.yaml @@ -0,0 +1,295 @@ +--- +uuid: 8224a30d-d3ec-47f7-b0e8-396c1cd111a6 +task_id: home1_us_smart_lock-lock_the_front_door_lock-4 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the front door lock + expect_changes: + lock.smart_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: locked + got: + state: unlocked + conversation_trace: + - event_type: async_process + data: + text: Lock the front door lock + context: + id: 01KHJQX5V5JP2PAED096SYKJ02 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:41.062068+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:41.064001+00:00 + - role: user + content: Lock the front door lock + attachments: null + created: 2026-02-16 08:08:41.062144+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16edb1e80>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:41.064015+00:00 + duration_ms: 63.165 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_lock-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_lock-0.yaml new file mode 100644 index 000000000..af5655d19 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_lock-0.yaml @@ -0,0 +1,293 @@ +--- +uuid: c26003e0-8b97-4022-9719-2d5b6eeb4612 +task_id: home1_us_smart_lock-lock_the_lock-0 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the lock + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null + lock.rear_door_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Lock the lock + context: + id: 01KHJQXF4KA06K56ENP4SX77ZC + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:50.579359+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:50.584808+00:00 + - role: user + content: Lock the lock + attachments: null + created: 2026-02-16 08:08:50.579437+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d50dfe0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:50.584823+00:00 + duration_ms: 63.112 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_lock-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_lock-1.yaml new file mode 100644 index 000000000..8091e888c --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_lock-1.yaml @@ -0,0 +1,293 @@ +--- +uuid: d5fd3957-9b56-4197-81b4-30b4cd851655 +task_id: home1_us_smart_lock-lock_the_lock-1 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the lock + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null + lock.rear_door_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Lock the lock + context: + id: 01KHJQXFF9716DR30CHW7ZCHS4 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:50.922077+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:50.924058+00:00 + - role: user + content: Lock the lock + attachments: null + created: 2026-02-16 08:08:50.922153+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17dfd4300>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:50.924072+00:00 + duration_ms: 151.279 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_lock-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_lock-2.yaml new file mode 100644 index 000000000..fcd906ae0 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_lock-2.yaml @@ -0,0 +1,293 @@ +--- +uuid: 706dcf4c-be30-4777-b229-d760a6eb82e9 +task_id: home1_us_smart_lock-lock_the_lock-2 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the lock + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null + lock.rear_door_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Lock the lock + context: + id: 01KHJQXFWFZC51QX10ZKK3J3AT + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:51.343547+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:51.347376+00:00 + - role: user + content: Lock the lock + attachments: null + created: 2026-02-16 08:08:51.343623+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d984510>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:51.347391+00:00 + duration_ms: 64.673 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_lock-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_lock-3.yaml new file mode 100644 index 000000000..68cfb4a86 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_lock-3.yaml @@ -0,0 +1,293 @@ +--- +uuid: 9945b970-531e-4dfe-a20d-7f5c4fa8fc00 +task_id: home1_us_smart_lock-lock_the_lock-3 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the lock + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null + lock.rear_door_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Lock the lock + context: + id: 01KHJQXG7NNATV9CRC19BWTW0H + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:51.701786+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:51.713595+00:00 + - role: user + content: Lock the lock + attachments: null + created: 2026-02-16 08:08:51.701863+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d3171c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:51.713614+00:00 + duration_ms: 67.969 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_lock-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_lock-4.yaml new file mode 100644 index 000000000..e8c8dde63 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_lock-4.yaml @@ -0,0 +1,293 @@ +--- +uuid: ec696082-75f4-4d94-9852-cbe8b684ec86 +task_id: home1_us_smart_lock-lock_the_lock-4 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the lock + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null + lock.rear_door_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Lock the lock + context: + id: 01KHJQXGHJG2RM266AF8SGCZWR + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:52.018911+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:52.020865+00:00 + - role: user + content: Lock the lock + attachments: null + created: 2026-02-16 08:08:52.018991+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16fe4aae0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:52.020879+00:00 + duration_ms: 66.741 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_smart_lock-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_smart_lock-0.yaml new file mode 100644 index 000000000..f6a977a85 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_smart_lock-0.yaml @@ -0,0 +1,295 @@ +--- +uuid: 07641cca-9160-4a37-bfea-986026a40ee0 +task_id: home1_us_smart_lock-lock_the_smart_lock-0 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the smart lock + expect_changes: + lock.smart_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: locked + got: + state: unlocked + conversation_trace: + - event_type: async_process + data: + text: Lock the smart lock + context: + id: 01KHJQX8454FNAA3GDF55Y3W73 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:43.397332+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:43.399179+00:00 + - role: user + content: Lock the smart lock + attachments: null + created: 2026-02-16 08:08:43.397409+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17daef5e0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:43.399192+00:00 + duration_ms: 64.81 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_smart_lock-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_smart_lock-1.yaml new file mode 100644 index 000000000..6286ad5ad --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_smart_lock-1.yaml @@ -0,0 +1,295 @@ +--- +uuid: 51c0a49f-dd85-4e11-9665-132375d4b49a +task_id: home1_us_smart_lock-lock_the_smart_lock-1 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the smart lock + expect_changes: + lock.smart_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: locked + got: + state: unlocked + conversation_trace: + - event_type: async_process + data: + text: Lock the smart lock + context: + id: 01KHJQX8EH7GVNX6NKD8ZYFYG5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:43.729247+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:43.732980+00:00 + - role: user + content: Lock the smart lock + attachments: null + created: 2026-02-16 08:08:43.729328+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16fe3a140>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:43.732995+00:00 + duration_ms: 66.348 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_smart_lock-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_smart_lock-2.yaml new file mode 100644 index 000000000..279760a97 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_smart_lock-2.yaml @@ -0,0 +1,295 @@ +--- +uuid: 0fe3b317-927a-445e-a0ad-f51a91ff2eb9 +task_id: home1_us_smart_lock-lock_the_smart_lock-2 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the smart lock + expect_changes: + lock.smart_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: locked + got: + state: unlocked + conversation_trace: + - event_type: async_process + data: + text: Lock the smart lock + context: + id: 01KHJQX8SSXVXEDTAGKYX5X75T + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:44.089643+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:44.091599+00:00 + - role: user + content: Lock the smart lock + attachments: null + created: 2026-02-16 08:08:44.089718+00:00 + tools: + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d5185c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:44.091612+00:00 + duration_ms: 59.673 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_smart_lock-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_smart_lock-3.yaml new file mode 100644 index 000000000..c4f301908 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_smart_lock-3.yaml @@ -0,0 +1,295 @@ +--- +uuid: f64551fc-3047-4e84-aa08-943ad420d0fb +task_id: home1_us_smart_lock-lock_the_smart_lock-3 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the smart lock + expect_changes: + lock.smart_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: locked + got: + state: unlocked + conversation_trace: + - event_type: async_process + data: + text: Lock the smart lock + context: + id: 01KHJQX9462GXHJ47H8R42FMNF + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:44.422573+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:44.424453+00:00 + - role: user + content: Lock the smart lock + attachments: null + created: 2026-02-16 08:08:44.422647+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16fe197a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:44.424466+00:00 + duration_ms: 59.932 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_smart_lock-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_smart_lock-4.yaml new file mode 100644 index 000000000..a94a0ccd9 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-lock_the_smart_lock-4.yaml @@ -0,0 +1,295 @@ +--- +uuid: 67282d07-a90c-4069-ad73-0a8441343e67 +task_id: home1_us_smart_lock-lock_the_smart_lock-4 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Lock the smart lock + expect_changes: + lock.smart_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: locked + got: + state: unlocked + conversation_trace: + - event_type: async_process + data: + text: Lock the smart lock + context: + id: 01KHJQX9EB962A9E7F7H1M9SQ7 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:44.747767+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:44.750098+00:00 + - role: user + content: Lock the smart lock + attachments: null + created: 2026-02-16 08:08:44.747844+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16ec614e0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:44.750113+00:00 + duration_ms: 73.761 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_all_the_doors-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_all_the_doors-0.yaml new file mode 100644 index 000000000..c076521e3 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_all_the_doors-0.yaml @@ -0,0 +1,293 @@ +--- +uuid: 39303167-b355-479e-9f92-288a63d7eef6 +task_id: home1_us_smart_lock-unlock_all_the_doors-0 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Unlock all the doors + expect_changes: + lock.smart_lock: + state: locked + attributes: null + lock.rear_door_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Unlock all the doors + context: + id: 01KHJQXJJ401YEGPQBECHTE7RB + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:54.084736+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:54.086718+00:00 + - role: user + content: Unlock all the doors + attachments: null + created: 2026-02-16 08:08:54.084813+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c7aeae0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:54.086733+00:00 + duration_ms: 63.064 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_all_the_doors-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_all_the_doors-1.yaml new file mode 100644 index 000000000..c63b1d761 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_all_the_doors-1.yaml @@ -0,0 +1,293 @@ +--- +uuid: c92edaa1-f4c2-4093-b8cc-200ec616c072 +task_id: home1_us_smart_lock-unlock_all_the_doors-1 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Unlock all the doors + expect_changes: + lock.smart_lock: + state: locked + attributes: null + lock.rear_door_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Unlock all the doors + context: + id: 01KHJQXJWBA0WMCFCFV697QYBY + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:54.411688+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:54.413574+00:00 + - role: user + content: Unlock all the doors + attachments: null + created: 2026-02-16 08:08:54.411763+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d1b4eb0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:54.413590+00:00 + duration_ms: 61.797 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_all_the_doors-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_all_the_doors-2.yaml new file mode 100644 index 000000000..d3e0bf38b --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_all_the_doors-2.yaml @@ -0,0 +1,293 @@ +--- +uuid: 1e0c9697-33c2-4cd1-815a-31c81a26c4e7 +task_id: home1_us_smart_lock-unlock_all_the_doors-2 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Unlock all the doors + expect_changes: + lock.smart_lock: + state: locked + attributes: null + lock.rear_door_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Unlock all the doors + context: + id: 01KHJQXK6A4WJGMS605YKTNWH6 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:54.730387+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:54.732186+00:00 + - role: user + content: Unlock all the doors + attachments: null + created: 2026-02-16 08:08:54.730466+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e80ce00>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:54.732200+00:00 + duration_ms: 143.213 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_all_the_doors-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_all_the_doors-3.yaml new file mode 100644 index 000000000..c1b935871 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_all_the_doors-3.yaml @@ -0,0 +1,293 @@ +--- +uuid: 24518b97-bb02-4aa7-9443-7e4f00d402f2 +task_id: home1_us_smart_lock-unlock_all_the_doors-3 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Unlock all the doors + expect_changes: + lock.smart_lock: + state: locked + attributes: null + lock.rear_door_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Unlock all the doors + context: + id: 01KHJQXKKCTFEQWVSRXENQ2XB2 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:55.148922+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:55.151275+00:00 + - role: user + content: Unlock all the doors + attachments: null + created: 2026-02-16 08:08:55.148999+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16dd65010>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:55.151289+00:00 + duration_ms: 150.825 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_all_the_doors-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_all_the_doors-4.yaml new file mode 100644 index 000000000..03cfe9527 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_all_the_doors-4.yaml @@ -0,0 +1,293 @@ +--- +uuid: 30a8e651-e8a4-4a46-9c26-06740599c0bd +task_id: home1_us_smart_lock-unlock_all_the_doors-4 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Unlock all the doors + expect_changes: + lock.smart_lock: + state: locked + attributes: null + lock.rear_door_lock: + state: locked + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Unlock all the doors + context: + id: 01KHJQXM08HEP1JM8CQT8M9BSS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:55.560651+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:55.567457+00:00 + - role: user + content: Unlock all the doors + attachments: null + created: 2026-02-16 08:08:55.560726+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16de4f110>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:55.567475+00:00 + duration_ms: 147.892 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_entry_lock-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_entry_lock-0.yaml new file mode 100644 index 000000000..75b60461d --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_entry_lock-0.yaml @@ -0,0 +1,295 @@ +--- +uuid: b23c3950-3338-4a5c-8345-a59cc26b5adb +task_id: home1_us_smart_lock-unlock_the_entry_lock-0 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Unlock the entry lock + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: unlocked + got: + state: locked + conversation_trace: + - event_type: async_process + data: + text: Unlock the entry lock + context: + id: 01KHJQX9S4HGC0HSDQWZRQB9RE + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:45.092713+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:45.097851+00:00 + - role: user + content: Unlock the entry lock + attachments: null + created: 2026-02-16 08:08:45.092789+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16cbc0e00>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:45.097867+00:00 + duration_ms: 63.445 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_entry_lock-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_entry_lock-1.yaml new file mode 100644 index 000000000..609b92273 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_entry_lock-1.yaml @@ -0,0 +1,295 @@ +--- +uuid: b7dd72b5-f093-4e59-81d2-5098c0ecd7ee +task_id: home1_us_smart_lock-unlock_the_entry_lock-1 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Unlock the entry lock + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: unlocked + got: + state: locked + conversation_trace: + - event_type: async_process + data: + text: Unlock the entry lock + context: + id: 01KHJQXA3N7N4K5J3RXM0RRSDF + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:45.429396+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:45.431806+00:00 + - role: user + content: Unlock the entry lock + attachments: null + created: 2026-02-16 08:08:45.429471+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17dfcab90>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:45.431820+00:00 + duration_ms: 63.201 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_entry_lock-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_entry_lock-2.yaml new file mode 100644 index 000000000..ca62199d7 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_entry_lock-2.yaml @@ -0,0 +1,295 @@ +--- +uuid: 6de41c45-8300-464f-a602-26b5b3852c2d +task_id: home1_us_smart_lock-unlock_the_entry_lock-2 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Unlock the entry lock + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: unlocked + got: + state: locked + conversation_trace: + - event_type: async_process + data: + text: Unlock the entry lock + context: + id: 01KHJQXADJH33Q9N0NJRMEANM5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:45.746447+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:45.750064+00:00 + - role: user + content: Unlock the entry lock + attachments: null + created: 2026-02-16 08:08:45.746545+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc185906400>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:45.750079+00:00 + duration_ms: 60.222 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_entry_lock-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_entry_lock-3.yaml new file mode 100644 index 000000000..b06964e7d --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_entry_lock-3.yaml @@ -0,0 +1,295 @@ +--- +uuid: 6c683e7b-a083-470b-bacc-aa9a54771dd2 +task_id: home1_us_smart_lock-unlock_the_entry_lock-3 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Unlock the entry lock + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: unlocked + got: + state: locked + conversation_trace: + - event_type: async_process + data: + text: Unlock the entry lock + context: + id: 01KHJQXAS43K6RY9YJFBVHVGRC + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:46.116546+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:46.118432+00:00 + - role: user + content: Unlock the entry lock + attachments: null + created: 2026-02-16 08:08:46.116621+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c9b5bc0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:46.118447+00:00 + duration_ms: 147.815 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_entry_lock-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_entry_lock-4.yaml new file mode 100644 index 000000000..d3aaed57e --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_entry_lock-4.yaml @@ -0,0 +1,295 @@ +--- +uuid: 2c2b8926-6832-4d96-9ad5-4830bec10986 +task_id: home1_us_smart_lock-unlock_the_entry_lock-4 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Unlock the entry lock + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: unlocked + got: + state: locked + conversation_trace: + - event_type: async_process + data: + text: Unlock the entry lock + context: + id: 01KHJQXB5YF5RCD2QMF3864QZ2 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:46.526479+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:46.528810+00:00 + - role: user + content: Unlock the entry lock + attachments: null + created: 2026-02-16 08:08:46.526556+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d7cd9b0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:46.528825+00:00 + duration_ms: 61.04 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_smart_lock-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_smart_lock-0.yaml new file mode 100644 index 000000000..1e0cf4b54 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_smart_lock-0.yaml @@ -0,0 +1,295 @@ +--- +uuid: b059cead-8c6c-4a8d-b22f-1ab3f30e0346 +task_id: home1_us_smart_lock-unlock_the_smart_lock-0 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Unlock the smart lock + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: unlocked + got: + state: locked + conversation_trace: + - event_type: async_process + data: + text: Unlock the smart lock + context: + id: 01KHJQXBG02VMY5749ENWRMSP0 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:46.848352+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:46.856452+00:00 + - role: user + content: Unlock the smart lock + attachments: null + created: 2026-02-16 08:08:46.848431+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16eb15170>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:46.856471+00:00 + duration_ms: 69.136 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_smart_lock-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_smart_lock-1.yaml new file mode 100644 index 000000000..13e2fe565 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_smart_lock-1.yaml @@ -0,0 +1,295 @@ +--- +uuid: bfd8a058-3cce-4621-b1c4-a5d8caf10fd6 +task_id: home1_us_smart_lock-unlock_the_smart_lock-1 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Unlock the smart lock + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: unlocked + got: + state: locked + conversation_trace: + - event_type: async_process + data: + text: Unlock the smart lock + context: + id: 01KHJQXBTRA161T3EYAQ03QXDR + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:47.192739+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:47.196820+00:00 + - role: user + content: Unlock the smart lock + attachments: null + created: 2026-02-16 08:08:47.192812+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16deef270>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:47.196837+00:00 + duration_ms: 150.353 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_smart_lock-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_smart_lock-2.yaml new file mode 100644 index 000000000..cffbfbb70 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_smart_lock-2.yaml @@ -0,0 +1,295 @@ +--- +uuid: 6f622051-c5fc-42c4-8308-b81a446b326b +task_id: home1_us_smart_lock-unlock_the_smart_lock-2 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Unlock the smart lock + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: unlocked + got: + state: locked + conversation_trace: + - event_type: async_process + data: + text: Unlock the smart lock + context: + id: 01KHJQXC80J02MSXS7N6P5WBQQ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:47.616352+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:47.620767+00:00 + - role: user + content: Unlock the smart lock + attachments: null + created: 2026-02-16 08:08:47.616428+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d12efb0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:47.620784+00:00 + duration_ms: 68.501 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_smart_lock-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_smart_lock-3.yaml new file mode 100644 index 000000000..4356b5405 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_smart_lock-3.yaml @@ -0,0 +1,295 @@ +--- +uuid: a7d5b195-66f6-4265-ba6d-a9cf602dc6c8 +task_id: home1_us_smart_lock-unlock_the_smart_lock-3 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Unlock the smart lock + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: unlocked + got: + state: locked + conversation_trace: + - event_type: async_process + data: + text: Unlock the smart lock + context: + id: 01KHJQXCKF8JH74ZW3STQQXXYS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:47.983803+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:47.985732+00:00 + - role: user + content: Unlock the smart lock + attachments: null + created: 2026-02-16 08:08:47.983877+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17da79d20>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:47.985746+00:00 + duration_ms: 60.418 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_smart_lock-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_smart_lock-4.yaml new file mode 100644 index 000000000..e25760d12 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_smart_lock-unlock_the_smart_lock-4.yaml @@ -0,0 +1,295 @@ +--- +uuid: 8ce435b0-9e73-4e8e-891c-f0cd47958997 +task_id: home1_us_smart_lock-unlock_the_smart_lock-4 +model_id: gemma-3-27b-it +category: lock +task: + input_text: Unlock the smart lock + expect_changes: + lock.smart_lock: + state: unlocked + attributes: null +response: Error talking to API +context: + unexpected_states: + lock.smart_lock: + expected: + state: unlocked + got: + state: locked + conversation_trace: + - event_type: async_process + data: + text: Unlock the smart lock + context: + id: 01KHJQXCXWYZ4B1EPACSPB2VJ5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:48.316530+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:48.323302+00:00 + - role: user + content: Unlock the smart lock + attachments: null + created: 2026-02-16 08:08:48.316608+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c5ca140>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:48.323320+00:00 + duration_ms: 67.99 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-clean_the_living_room-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-clean_the_living_room-0.yaml new file mode 100644 index 000000000..2939059ff --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-clean_the_living_room-0.yaml @@ -0,0 +1,295 @@ +--- +uuid: 5d7e563b-90e8-4aba-9573-416ddaf1bb95 +task_id: home1_us_vacuum-clean_the_living_room-0 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Clean the living room + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Clean the living room + context: + id: 01KHJQXXDT0Y4H66FQE8EEPYCS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:05.211012+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:05.214782+00:00 + - role: user + content: Clean the living room + attachments: null + created: 2026-02-16 08:09:05.211089+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d60bc10>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:05.214798+00:00 + duration_ms: 144.981 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-clean_the_living_room-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-clean_the_living_room-1.yaml new file mode 100644 index 000000000..7f82ac0fe --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-clean_the_living_room-1.yaml @@ -0,0 +1,295 @@ +--- +uuid: 51c1c2ab-d729-4e0a-982d-c634ced3f38d +task_id: home1_us_vacuum-clean_the_living_room-1 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Clean the living room + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Clean the living room + context: + id: 01KHJQXXTNY6VBP4N96Q7C7JMP + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:05.621832+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:05.625240+00:00 + - role: user + content: Clean the living room + attachments: null + created: 2026-02-16 08:09:05.621909+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16de4d7a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:05.625255+00:00 + duration_ms: 177.022 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-clean_the_living_room-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-clean_the_living_room-2.yaml new file mode 100644 index 000000000..66d941286 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-clean_the_living_room-2.yaml @@ -0,0 +1,295 @@ +--- +uuid: f828549d-8a85-456e-bd09-2a406f80f52a +task_id: home1_us_vacuum-clean_the_living_room-2 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Clean the living room + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Clean the living room + context: + id: 01KHJQXY8S9GX3GXCC1FQWZJVV + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:06.073678+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:06.075595+00:00 + - role: user + content: Clean the living room + attachments: null + created: 2026-02-16 08:09:06.073752+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d6ac510>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:06.075608+00:00 + duration_ms: 61.086 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-clean_the_living_room-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-clean_the_living_room-3.yaml new file mode 100644 index 000000000..c41609ecb --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-clean_the_living_room-3.yaml @@ -0,0 +1,295 @@ +--- +uuid: b788a292-e57d-4995-aa8d-f34233a80403 +task_id: home1_us_vacuum-clean_the_living_room-3 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Clean the living room + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Clean the living room + context: + id: 01KHJQXYK25Q8HFG68AQPBW9YM + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:06.402409+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:06.407383+00:00 + - role: user + content: Clean the living room + attachments: null + created: 2026-02-16 08:09:06.402485+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16ff0cb40>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:06.407396+00:00 + duration_ms: 69.344 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-clean_the_living_room-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-clean_the_living_room-4.yaml new file mode 100644 index 000000000..91e001c76 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-clean_the_living_room-4.yaml @@ -0,0 +1,295 @@ +--- +uuid: 5b7a2522-e903-406b-8d80-8cde744c1b0c +task_id: home1_us_vacuum-clean_the_living_room-4 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Clean the living room + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Clean the living room + context: + id: 01KHJQXYXT1ESY8G6FQVHMWK91 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:06.746319+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:06.748211+00:00 + - role: user + content: Clean the living room + attachments: null + created: 2026-02-16 08:09:06.746398+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d23c460>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:06.748236+00:00 + duration_ms: 141.317 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-please_start_cleaning-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-please_start_cleaning-0.yaml new file mode 100644 index 000000000..a6579dc07 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-please_start_cleaning-0.yaml @@ -0,0 +1,295 @@ +--- +uuid: a6b68354-7f57-4263-878a-5dbcf7c22c6c +task_id: home1_us_vacuum-please_start_cleaning-0 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Please start cleaning + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Please start cleaning + context: + id: 01KHJQY1NBK9FZNS1FVPMKNZRP + parent_id: null + user_id: null + conversation_id: null + device_id: 19bfebe6ae32cccb3c2163ef913cde88 + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:09.547150+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + You are in area Living Room and all generic commands like 'turn on the lights' should target this area. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:09.548998+00:00 + - role: user + content: Please start cleaning + attachments: null + created: 2026-02-16 08:09:09.547252+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e98eae0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:09.549010+00:00 + duration_ms: 62.251 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-please_start_cleaning-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-please_start_cleaning-1.yaml new file mode 100644 index 000000000..6f3f4a751 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-please_start_cleaning-1.yaml @@ -0,0 +1,295 @@ +--- +uuid: 68d22753-65dd-41f2-a53f-0f54abb80151 +task_id: home1_us_vacuum-please_start_cleaning-1 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Please start cleaning + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Please start cleaning + context: + id: 01KHJQY1ZPFHCC1KAA7HENC331 + parent_id: null + user_id: null + conversation_id: null + device_id: f2148561788fc45363eb4b2bc3ed9bc7 + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:09.878932+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + You are in area Living Room and all generic commands like 'turn on the lights' should target this area. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:09.880880+00:00 + - role: user + content: Please start cleaning + attachments: null + created: 2026-02-16 08:09:09.879007+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d7c1900>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:09.880892+00:00 + duration_ms: 62.281 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-please_start_cleaning-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-please_start_cleaning-2.yaml new file mode 100644 index 000000000..9a2e69592 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-please_start_cleaning-2.yaml @@ -0,0 +1,295 @@ +--- +uuid: 44ffccb8-6dad-4618-bbc3-f9f076daf928 +task_id: home1_us_vacuum-please_start_cleaning-2 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Please start cleaning + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Please start cleaning + context: + id: 01KHJQY2A4VZ9Z2A5R0Q3JYXDT + parent_id: null + user_id: null + conversation_id: null + device_id: 8a70e07cb8073ca24c7f4ba173eb8394 + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:10.212561+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + You are in area Living Room and all generic commands like 'turn on the lights' should target this area. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:10.215752+00:00 + - role: user + content: Please start cleaning + attachments: null + created: 2026-02-16 08:09:10.212637+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c8abd70>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:10.215766+00:00 + duration_ms: 61.655 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-please_start_cleaning-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-please_start_cleaning-3.yaml new file mode 100644 index 000000000..6242e5c7a --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-please_start_cleaning-3.yaml @@ -0,0 +1,295 @@ +--- +uuid: 2d864d07-b7d8-47d2-9838-e4b401a9d18c +task_id: home1_us_vacuum-please_start_cleaning-3 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Please start cleaning + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Please start cleaning + context: + id: 01KHJQY2NFA9TC4X396JW79G22 + parent_id: null + user_id: null + conversation_id: null + device_id: be8cd87eb39a7e8504656a0a47782e82 + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:10.575841+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + You are in area Living Room and all generic commands like 'turn on the lights' should target this area. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:10.577790+00:00 + - role: user + content: Please start cleaning + attachments: null + created: 2026-02-16 08:09:10.575915+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16eb05a60>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:10.577804+00:00 + duration_ms: 61.254 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-please_start_cleaning-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-please_start_cleaning-4.yaml new file mode 100644 index 000000000..dcc744940 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-please_start_cleaning-4.yaml @@ -0,0 +1,295 @@ +--- +uuid: 9a432bf1-109c-4f34-9c0d-83b30be13cd5 +task_id: home1_us_vacuum-please_start_cleaning-4 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Please start cleaning + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Please start cleaning + context: + id: 01KHJQY31821ERF6EDZ67YN96P + parent_id: null + user_id: null + conversation_id: null + device_id: 6f3afbd321d98d4b6ba9abf9483da197 + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:10.952766+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + You are in area Living Room and all generic commands like 'turn on the lights' should target this area. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:10.954700+00:00 + - role: user + content: Please start cleaning + attachments: null + created: 2026-02-16 08:09:10.952848+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d9261f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:10.954713+00:00 + duration_ms: 70.084 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-return_roborock_downstairs_to_base-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-return_roborock_downstairs_to_base-0.yaml new file mode 100644 index 000000000..8436d1e7d --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-return_roborock_downstairs_to_base-0.yaml @@ -0,0 +1,295 @@ +--- +uuid: 496adb19-5442-4dcd-88e6-26262ffc278f +task_id: home1_us_vacuum-return_roborock_downstairs_to_base-0 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Return Roborock Downstairs to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Return Roborock Downstairs to base + context: + id: 01KHJQY5909Z0FXYERA25KE30V + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:13.248997+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:13.250843+00:00 + - role: user + content: Return Roborock Downstairs to base + attachments: null + created: 2026-02-16 08:09:13.249074+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16fe2fd70>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:13.250856+00:00 + duration_ms: 60.285 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-return_roborock_downstairs_to_base-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-return_roborock_downstairs_to_base-1.yaml new file mode 100644 index 000000000..c6c757dbb --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-return_roborock_downstairs_to_base-1.yaml @@ -0,0 +1,295 @@ +--- +uuid: 1c3bf62f-44c9-4841-b04b-1e5175f5a0a4 +task_id: home1_us_vacuum-return_roborock_downstairs_to_base-1 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Return Roborock Downstairs to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Return Roborock Downstairs to base + context: + id: 01KHJQY5K1VB40DYS9WAS2TMJ4 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:13.569458+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:13.571247+00:00 + - role: user + content: Return Roborock Downstairs to base + attachments: null + created: 2026-02-16 08:09:13.569536+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16ecbe350>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:13.571260+00:00 + duration_ms: 58.034 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-return_roborock_downstairs_to_base-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-return_roborock_downstairs_to_base-2.yaml new file mode 100644 index 000000000..e48444c1d --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-return_roborock_downstairs_to_base-2.yaml @@ -0,0 +1,295 @@ +--- +uuid: 540fb68c-855b-420a-a6c1-95acfaa0c8d1 +task_id: home1_us_vacuum-return_roborock_downstairs_to_base-2 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Return Roborock Downstairs to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Return Roborock Downstairs to base + context: + id: 01KHJQY5XBTYJYDYEV5SDZ4JSS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:13.899211+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:13.901464+00:00 + - role: user + content: Return Roborock Downstairs to base + attachments: null + created: 2026-02-16 08:09:13.899312+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c5dc670>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:13.901477+00:00 + duration_ms: 62.689 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-return_roborock_downstairs_to_base-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-return_roborock_downstairs_to_base-3.yaml new file mode 100644 index 000000000..d0e5a1328 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-return_roborock_downstairs_to_base-3.yaml @@ -0,0 +1,295 @@ +--- +uuid: 94de2f4e-8831-4511-94a2-4a84fba3d4e3 +task_id: home1_us_vacuum-return_roborock_downstairs_to_base-3 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Return Roborock Downstairs to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Return Roborock Downstairs to base + context: + id: 01KHJQY67BZPW9JE4YEE98DKQ7 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:14.219908+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:14.223455+00:00 + - role: user + content: Return Roborock Downstairs to base + attachments: null + created: 2026-02-16 08:09:14.219984+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16fd1e140>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:14.223469+00:00 + duration_ms: 63.588 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-return_roborock_downstairs_to_base-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-return_roborock_downstairs_to_base-4.yaml new file mode 100644 index 000000000..e5c33aebd --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-return_roborock_downstairs_to_base-4.yaml @@ -0,0 +1,295 @@ +--- +uuid: 7f79867c-f2a0-4896-9d3e-dc8dd369dbe0 +task_id: home1_us_vacuum-return_roborock_downstairs_to_base-4 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Return Roborock Downstairs to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Return Roborock Downstairs to base + context: + id: 01KHJQY6HP10BV0KHD2N3H44ES + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:14.550517+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:14.554031+00:00 + - role: user + content: Return Roborock Downstairs to base + attachments: null + created: 2026-02-16 08:09:14.550590+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c526cf0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:14.554045+00:00 + duration_ms: 162.662 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_downstairs_return_to_base-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_downstairs_return_to_base-0.yaml new file mode 100644 index 000000000..aa8d5179b --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_downstairs_return_to_base-0.yaml @@ -0,0 +1,295 @@ +--- +uuid: 68158148-9a4b-4058-8be4-52679bacb5c6 +task_id: home1_us_vacuum-roborock_downstairs_return_to_base-0 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Roborock Downstairs return to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Roborock Downstairs return to base + context: + id: 01KHJQY70BQ7K9NVRQPHD2CQR5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:15.019282+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:15.021150+00:00 + - role: user + content: Roborock Downstairs return to base + attachments: null + created: 2026-02-16 08:09:15.019359+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16dda5380>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:15.021162+00:00 + duration_ms: 192.701 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_downstairs_return_to_base-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_downstairs_return_to_base-1.yaml new file mode 100644 index 000000000..b99910f46 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_downstairs_return_to_base-1.yaml @@ -0,0 +1,295 @@ +--- +uuid: d735c437-751d-4824-be69-aeeaa93963ed +task_id: home1_us_vacuum-roborock_downstairs_return_to_base-1 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Roborock Downstairs return to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Roborock Downstairs return to base + context: + id: 01KHJQY7E569Y80WK6HDBTJ6SZ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:15.461887+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:15.468835+00:00 + - role: user + content: Roborock Downstairs return to base + attachments: null + created: 2026-02-16 08:09:15.461962+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16fe2f320>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:15.468851+00:00 + duration_ms: 69.672 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_downstairs_return_to_base-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_downstairs_return_to_base-2.yaml new file mode 100644 index 000000000..62d8f1f7e --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_downstairs_return_to_base-2.yaml @@ -0,0 +1,295 @@ +--- +uuid: f188a107-5c77-4868-b46d-42a0f1398569 +task_id: home1_us_vacuum-roborock_downstairs_return_to_base-2 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Roborock Downstairs return to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Roborock Downstairs return to base + context: + id: 01KHJQY7RFPBQTMVDP3HKE268S + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:15.791759+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:15.798359+00:00 + - role: user + content: Roborock Downstairs return to base + attachments: null + created: 2026-02-16 08:09:15.791832+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16ff46c40>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:15.798375+00:00 + duration_ms: 154.418 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_downstairs_return_to_base-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_downstairs_return_to_base-3.yaml new file mode 100644 index 000000000..92e5ab745 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_downstairs_return_to_base-3.yaml @@ -0,0 +1,295 @@ +--- +uuid: 04b414c3-5e43-41c3-95bf-a21b55a6ac37 +task_id: home1_us_vacuum-roborock_downstairs_return_to_base-3 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Roborock Downstairs return to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Roborock Downstairs return to base + context: + id: 01KHJQY85WEFNZ26CM8MK27BRT + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:16.220343+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:16.222239+00:00 + - role: user + content: Roborock Downstairs return to base + attachments: null + created: 2026-02-16 08:09:16.220420+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d60b320>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:16.222253+00:00 + duration_ms: 71.769 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_downstairs_return_to_base-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_downstairs_return_to_base-4.yaml new file mode 100644 index 000000000..c4a085ffa --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_downstairs_return_to_base-4.yaml @@ -0,0 +1,295 @@ +--- +uuid: 7269884f-1b63-4057-92a6-8bbdbb5c8f37 +task_id: home1_us_vacuum-roborock_downstairs_return_to_base-4 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Roborock Downstairs return to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Roborock Downstairs return to base + context: + id: 01KHJQY8GH2T6MMSCWSRYY5EVF + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:16.561461+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:16.565190+00:00 + - role: user + content: Roborock Downstairs return to base + attachments: null + created: 2026-02-16 08:09:16.561538+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16fd077f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:16.565204+00:00 + duration_ms: 61.855 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_return_to_base-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_return_to_base-0.yaml new file mode 100644 index 000000000..ba5288da0 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_return_to_base-0.yaml @@ -0,0 +1,295 @@ +--- +uuid: dfb649ca-f406-44e0-89bd-f53d94349aa2 +task_id: home1_us_vacuum-roborock_return_to_base-0 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Roborock return to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Roborock return to base + context: + id: 01KHJQY8VP1NWWQZ8S5D0NGCHV + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:16.918806+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:16.922189+00:00 + - role: user + content: Roborock return to base + attachments: null + created: 2026-02-16 08:09:16.918880+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16ea99b10>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:16.922204+00:00 + duration_ms: 94.351 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_return_to_base-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_return_to_base-1.yaml new file mode 100644 index 000000000..0f98f2854 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_return_to_base-1.yaml @@ -0,0 +1,295 @@ +--- +uuid: 639e1a1c-070a-47bc-8d00-1672f5915c43 +task_id: home1_us_vacuum-roborock_return_to_base-1 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Roborock return to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Roborock return to base + context: + id: 01KHJQY97BFEG08775533A5905 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:17.292102+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:17.294000+00:00 + - role: user + content: Roborock return to base + attachments: null + created: 2026-02-16 08:09:17.292177+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d4b1f30>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:17.294014+00:00 + duration_ms: 60.477 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_return_to_base-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_return_to_base-2.yaml new file mode 100644 index 000000000..297c28cbb --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_return_to_base-2.yaml @@ -0,0 +1,295 @@ +--- +uuid: b4f5dd38-df55-4fc8-b862-911cbd2fa9fe +task_id: home1_us_vacuum-roborock_return_to_base-2 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Roborock return to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Roborock return to base + context: + id: 01KHJQY9GW82KPTSXHYDNHYTRM + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:17.597051+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:17.602048+00:00 + - role: user + content: Roborock return to base + attachments: null + created: 2026-02-16 08:09:17.597127+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16fef9220>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:17.602063+00:00 + duration_ms: 59.664 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_return_to_base-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_return_to_base-3.yaml new file mode 100644 index 000000000..03725d196 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_return_to_base-3.yaml @@ -0,0 +1,295 @@ +--- +uuid: bb1df5e7-9655-418f-9fd4-2463c44620e1 +task_id: home1_us_vacuum-roborock_return_to_base-3 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Roborock return to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Roborock return to base + context: + id: 01KHJQY9V3JEFFSJ30SN40V79H + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:17.923362+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:17.925296+00:00 + - role: user + content: Roborock return to base + attachments: null + created: 2026-02-16 08:09:17.923436+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d848460>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:17.925309+00:00 + duration_ms: 57.963 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_return_to_base-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_return_to_base-4.yaml new file mode 100644 index 000000000..66b813877 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-roborock_return_to_base-4.yaml @@ -0,0 +1,295 @@ +--- +uuid: e02b3153-6ee1-47a0-933c-c6d8f78fc343 +task_id: home1_us_vacuum-roborock_return_to_base-4 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Roborock return to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Roborock return to base + context: + id: 01KHJQYA4XJP4K3XZT9S8F82X7 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:18.237119+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:18.240766+00:00 + - role: user + content: Roborock return to base + attachments: null + created: 2026-02-16 08:09:18.237194+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d34e2a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:18.240780+00:00 + duration_ms: 61.649 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock-0.yaml new file mode 100644 index 000000000..2e4293623 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock-0.yaml @@ -0,0 +1,295 @@ +--- +uuid: cf2b8174-4c3b-4d2f-bbba-1194934a8d75 +task_id: home1_us_vacuum-start_roborock-0 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start Roborock + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start Roborock + context: + id: 01KHJQXQZNN4KVBT3R8SD47JAC + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:59.637157+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:59.639089+00:00 + - role: user + content: Start Roborock + attachments: null + created: 2026-02-16 08:08:59.637258+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e9aee50>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:59.639101+00:00 + duration_ms: 145.042 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock-1.yaml new file mode 100644 index 000000000..1352b8aaa --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock-1.yaml @@ -0,0 +1,295 @@ +--- +uuid: c6a65559-61b3-48c3-9a42-54d2330de4c5 +task_id: home1_us_vacuum-start_roborock-1 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start Roborock + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start Roborock + context: + id: 01KHJQXRC226M3T1ZPZERRJHWR + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:00.034596+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:00.036895+00:00 + - role: user + content: Start Roborock + attachments: null + created: 2026-02-16 08:09:00.034669+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d23f690>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:00.036910+00:00 + duration_ms: 59.564 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock-2.yaml new file mode 100644 index 000000000..4d0dc0a64 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock-2.yaml @@ -0,0 +1,295 @@ +--- +uuid: a9002fe8-126b-4b52-9194-a1baeb563f2e +task_id: home1_us_vacuum-start_roborock-2 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start Roborock + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start Roborock + context: + id: 01KHJQXRP38H099F11QY0GCSSG + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:00.355117+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:00.358852+00:00 + - role: user + content: Start Roborock + attachments: null + created: 2026-02-16 08:09:00.355194+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17dc3fcc0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:00.358868+00:00 + duration_ms: 62.737 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock-3.yaml new file mode 100644 index 000000000..484b99c22 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock-3.yaml @@ -0,0 +1,295 @@ +--- +uuid: 74984ec4-72f7-4e94-aabd-ad28fcee9b8e +task_id: home1_us_vacuum-start_roborock-3 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start Roborock + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start Roborock + context: + id: 01KHJQXS0DACBARPFCRTM49A1P + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:00.685345+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:00.693302+00:00 + - role: user + content: Start Roborock + attachments: null + created: 2026-02-16 08:09:00.685424+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16eb63320>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:00.693320+00:00 + duration_ms: 67.971 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock-4.yaml new file mode 100644 index 000000000..de9489330 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock-4.yaml @@ -0,0 +1,295 @@ +--- +uuid: a2806372-54dc-4b66-969f-f2aa4779a782 +task_id: home1_us_vacuum-start_roborock-4 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start Roborock + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start Roborock + context: + id: 01KHJQXSAY0KN86V5K1X8Q68MJ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:01.023060+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:01.026720+00:00 + - role: user + content: Start Roborock + attachments: null + created: 2026-02-16 08:09:01.023136+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d2c8a90>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:01.026734+00:00 + duration_ms: 147.97 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs-0.yaml new file mode 100644 index 000000000..6ed1651b8 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs-0.yaml @@ -0,0 +1,295 @@ +--- +uuid: d8062113-fcd6-49b9-86d1-70710bffad3a +task_id: home1_us_vacuum-start_roborock_downstairs-0 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start Roborock Downstairs + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start Roborock Downstairs + context: + id: 01KHJQXMKJ5027430CASR0C2KD + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:56.178585+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:56.180473+00:00 + - role: user + content: Start Roborock Downstairs + attachments: null + created: 2026-02-16 08:08:56.178664+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16dea0e00>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:56.180488+00:00 + duration_ms: 148.728 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs-1.yaml new file mode 100644 index 000000000..868964c14 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs-1.yaml @@ -0,0 +1,295 @@ +--- +uuid: f360beed-7c78-4e82-881c-9b03a05daadb +task_id: home1_us_vacuum-start_roborock_downstairs-1 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start Roborock Downstairs + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start Roborock Downstairs + context: + id: 01KHJQXN17MWDAJJFBY33BKP16 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:56.615967+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:56.619745+00:00 + - role: user + content: Start Roborock Downstairs + attachments: null + created: 2026-02-16 08:08:56.616043+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d8650c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:56.619760+00:00 + duration_ms: 69.771 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs-2.yaml new file mode 100644 index 000000000..6f836e750 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs-2.yaml @@ -0,0 +1,295 @@ +--- +uuid: 606268a2-26ec-446a-95d8-903c4b5215e9 +task_id: home1_us_vacuum-start_roborock_downstairs-2 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start Roborock Downstairs + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start Roborock Downstairs + context: + id: 01KHJQXNCGG1HCXBX9TJSPMTHX + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:56.976970+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:56.988985+00:00 + - role: user + content: Start Roborock Downstairs + attachments: null + created: 2026-02-16 08:08:56.977051+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d5b6820>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:56.989004+00:00 + duration_ms: 69.604 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs-3.yaml new file mode 100644 index 000000000..c0fa81e30 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs-3.yaml @@ -0,0 +1,295 @@ +--- +uuid: c1eabc2a-98a8-4147-8616-a8384ad6bbd8 +task_id: home1_us_vacuum-start_roborock_downstairs-3 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start Roborock Downstairs + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start Roborock Downstairs + context: + id: 01KHJQXNP8YGXV0GMYVY5E9H92 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:57.288384+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:57.290258+00:00 + - role: user + content: Start Roborock Downstairs + attachments: null + created: 2026-02-16 08:08:57.288458+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d68d430>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:57.290272+00:00 + duration_ms: 59.216 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs-4.yaml new file mode 100644 index 000000000..bf5ccae37 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs-4.yaml @@ -0,0 +1,295 @@ +--- +uuid: 391a84cc-774d-4c4f-b999-a1bb93f7943e +task_id: home1_us_vacuum-start_roborock_downstairs-4 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start Roborock Downstairs + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start Roborock Downstairs + context: + id: 01KHJQXP0G8G84ZDEY5FW15ZJG + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:57.616480+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:57.618308+00:00 + - role: user + content: Start Roborock Downstairs + attachments: null + created: 2026-02-16 08:08:57.616559+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d84b7f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:57.618320+00:00 + duration_ms: 63.945 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs_vacuum-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs_vacuum-0.yaml new file mode 100644 index 000000000..1a2a06755 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs_vacuum-0.yaml @@ -0,0 +1,295 @@ +--- +uuid: fe5bad79-9983-4790-8256-584ef37684fb +task_id: home1_us_vacuum-start_roborock_downstairs_vacuum-0 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start Roborock Downstairs vacuum + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start Roborock Downstairs vacuum + context: + id: 01KHJQXPANHE84P9FDP0JFTSKR + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:57.941402+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:57.946990+00:00 + - role: user + content: Start Roborock Downstairs vacuum + attachments: null + created: 2026-02-16 08:08:57.941481+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d89d4e0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:57.947007+00:00 + duration_ms: 62.661 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs_vacuum-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs_vacuum-1.yaml new file mode 100644 index 000000000..56e94f0e9 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs_vacuum-1.yaml @@ -0,0 +1,295 @@ +--- +uuid: 49e929bb-2534-490a-9606-737a30949635 +task_id: home1_us_vacuum-start_roborock_downstairs_vacuum-1 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start Roborock Downstairs vacuum + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start Roborock Downstairs vacuum + context: + id: 01KHJQXPMQV8PPCF31ANWT4PCG + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:58.263971+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:58.265871+00:00 + - role: user + content: Start Roborock Downstairs vacuum + attachments: null + created: 2026-02-16 08:08:58.264044+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e82cd50>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:58.265885+00:00 + duration_ms: 58.112 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs_vacuum-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs_vacuum-2.yaml new file mode 100644 index 000000000..eb5e04fd8 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs_vacuum-2.yaml @@ -0,0 +1,295 @@ +--- +uuid: 186b1180-451f-41b3-9552-700fa3971e53 +task_id: home1_us_vacuum-start_roborock_downstairs_vacuum-2 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start Roborock Downstairs vacuum + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start Roborock Downstairs vacuum + context: + id: 01KHJQXPZCV2C478N0EPBWAJ63 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:58.604488+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:58.606977+00:00 + - role: user + content: Start Roborock Downstairs vacuum + attachments: null + created: 2026-02-16 08:08:58.604566+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16eb63c10>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:58.606992+00:00 + duration_ms: 63.719 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs_vacuum-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs_vacuum-3.yaml new file mode 100644 index 000000000..67d8e6e42 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs_vacuum-3.yaml @@ -0,0 +1,295 @@ +--- +uuid: fb81ed75-0197-43f7-bca5-105befd478f9 +task_id: home1_us_vacuum-start_roborock_downstairs_vacuum-3 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start Roborock Downstairs vacuum + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start Roborock Downstairs vacuum + context: + id: 01KHJQXQ9VMFVQSPZQQSQ65PX7 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:58.939547+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:58.943308+00:00 + - role: user + content: Start Roborock Downstairs vacuum + attachments: null + created: 2026-02-16 08:08:58.939627+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d052770>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:58.943324+00:00 + duration_ms: 62.603 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs_vacuum-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs_vacuum-4.yaml new file mode 100644 index 000000000..8bdb0f3db --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_roborock_downstairs_vacuum-4.yaml @@ -0,0 +1,295 @@ +--- +uuid: 53e5674d-13c3-486d-b3ef-e61e390b67ef +task_id: home1_us_vacuum-start_roborock_downstairs_vacuum-4 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start Roborock Downstairs vacuum + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start Roborock Downstairs vacuum + context: + id: 01KHJQXQM7QX1RBVPT0SB63F7C + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:59.271285+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:08:59.274836+00:00 + - role: user + content: Start Roborock Downstairs vacuum + attachments: null + created: 2026-02-16 08:08:59.271366+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d84b950>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:59.274851+00:00 + duration_ms: 63.957 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_the_vacuum-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_the_vacuum-0.yaml new file mode 100644 index 000000000..16b608fd5 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_the_vacuum-0.yaml @@ -0,0 +1,295 @@ +--- +uuid: 7d4999c4-90ab-4e25-96a1-e2b76c1af979 +task_id: home1_us_vacuum-start_the_vacuum-0 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start the vacuum + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start the vacuum + context: + id: 01KHJQXZAGQ9TS0CB7B7J8JB06 + parent_id: null + user_id: null + conversation_id: null + device_id: 56d326eb6663d12087d0237301dec91b + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:07.152214+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + You are in area Living Room and all generic commands like 'turn on the lights' should target this area. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:07.155863+00:00 + - role: user + content: Start the vacuum + attachments: null + created: 2026-02-16 08:09:07.152318+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d6dc250>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:07.155877+00:00 + duration_ms: 66.124 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_the_vacuum-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_the_vacuum-1.yaml new file mode 100644 index 000000000..951e005ef --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_the_vacuum-1.yaml @@ -0,0 +1,295 @@ +--- +uuid: b03d9893-20d4-42ed-81be-bbd238d68b88 +task_id: home1_us_vacuum-start_the_vacuum-1 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start the vacuum + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start the vacuum + context: + id: 01KHJQXZMM2SBE8JSS99VXPGC5 + parent_id: null + user_id: null + conversation_id: null + device_id: 6be302494d04cfbd1ff50a2d32a7ac32 + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:07.476510+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + You are in area Living Room and all generic commands like 'turn on the lights' should target this area. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:07.482368+00:00 + - role: user + content: Start the vacuum + attachments: null + created: 2026-02-16 08:09:07.476584+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c8aa8d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:07.482382+00:00 + duration_ms: 60.971 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_the_vacuum-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_the_vacuum-2.yaml new file mode 100644 index 000000000..d78d32143 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_the_vacuum-2.yaml @@ -0,0 +1,295 @@ +--- +uuid: e08c0f14-82e6-4f69-bf91-1ebcd0405f79 +task_id: home1_us_vacuum-start_the_vacuum-2 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start the vacuum + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start the vacuum + context: + id: 01KHJQXZZ4M6P6KCQ69KDSFJ46 + parent_id: null + user_id: null + conversation_id: null + device_id: 8d9f0472a580344b5e8ce4c85f28bbee + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:07.812850+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + You are in area Living Room and all generic commands like 'turn on the lights' should target this area. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:07.816646+00:00 + - role: user + content: Start the vacuum + attachments: null + created: 2026-02-16 08:09:07.812925+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16fe49c70>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:07.816660+00:00 + duration_ms: 61.128 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_the_vacuum-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_the_vacuum-3.yaml new file mode 100644 index 000000000..94d714959 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_the_vacuum-3.yaml @@ -0,0 +1,295 @@ +--- +uuid: 859e1208-4488-436e-9e95-9d0cac05ab8d +task_id: home1_us_vacuum-start_the_vacuum-3 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start the vacuum + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start the vacuum + context: + id: 01KHJQY0ACN8EZGSBNZF9TR692 + parent_id: null + user_id: null + conversation_id: null + device_id: 15cfc399080937b1caf3a831df60618b + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:08.172847+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + You are in area Living Room and all generic commands like 'turn on the lights' should target this area. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:08.174746+00:00 + - role: user + content: Start the vacuum + attachments: null + created: 2026-02-16 08:09:08.172922+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17de6c040>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:08.174758+00:00 + duration_ms: 64.963 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_the_vacuum-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_the_vacuum-4.yaml new file mode 100644 index 000000000..9aa4408d0 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_the_vacuum-4.yaml @@ -0,0 +1,295 @@ +--- +uuid: b86d6bf2-e124-4d3f-8047-d2fa1aec067e +task_id: home1_us_vacuum-start_the_vacuum-4 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start the vacuum + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start the vacuum + context: + id: 01KHJQY0M6Q1CZ8TY6ZE9T2BQJ + parent_id: null + user_id: null + conversation_id: null + device_id: 14ee55e560990b6bdb2a74ef6a9f87cc + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:08.486897+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + You are in area Living Room and all generic commands like 'turn on the lights' should target this area. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:08.488774+00:00 + - role: user + content: Start the vacuum + attachments: null + created: 2026-02-16 08:09:08.486969+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16deeda60>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:08.488788+00:00 + duration_ms: 62.596 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_vacuuming-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_vacuuming-0.yaml new file mode 100644 index 000000000..8f0d61796 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_vacuuming-0.yaml @@ -0,0 +1,295 @@ +--- +uuid: fffc897b-def1-40dc-954d-90543ae8905a +task_id: home1_us_vacuum-start_vacuuming-0 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start vacuuming + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start vacuuming + context: + id: 01KHJQXSRS96D6PQBX7V30XQEG + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:01.465188+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:01.467158+00:00 + - role: user + content: Start vacuuming + attachments: null + created: 2026-02-16 08:09:01.465295+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17db43270>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:01.467171+00:00 + duration_ms: 58.335 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_vacuuming-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_vacuuming-1.yaml new file mode 100644 index 000000000..43066965d --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_vacuuming-1.yaml @@ -0,0 +1,295 @@ +--- +uuid: 6523110a-69af-4d3f-8bc0-56ddfa4cce85 +task_id: home1_us_vacuum-start_vacuuming-1 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start vacuuming + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start vacuuming + context: + id: 01KHJQXT34SAST672RHT223B4T + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:01.796629+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:01.798548+00:00 + - role: user + content: Start vacuuming + attachments: null + created: 2026-02-16 08:09:01.796705+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17de6dd20>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:01.798561+00:00 + duration_ms: 59.294 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_vacuuming-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_vacuuming-2.yaml new file mode 100644 index 000000000..e6d996212 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_vacuuming-2.yaml @@ -0,0 +1,295 @@ +--- +uuid: 2e27038e-ce1b-4d5f-bedd-87d72802d14f +task_id: home1_us_vacuum-start_vacuuming-2 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start vacuuming + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start vacuuming + context: + id: 01KHJQXTDHT2KQ4FW8C97TTMWA + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:02.129381+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:02.131785+00:00 + - role: user + content: Start vacuuming + attachments: null + created: 2026-02-16 08:09:02.129458+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc1859be560>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:02.131800+00:00 + duration_ms: 67.75 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_vacuuming-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_vacuuming-3.yaml new file mode 100644 index 000000000..2e1305a7b --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_vacuuming-3.yaml @@ -0,0 +1,295 @@ +--- +uuid: 06242906-632b-40bf-97ff-660795c03aee +task_id: home1_us_vacuum-start_vacuuming-3 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start vacuuming + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start vacuuming + context: + id: 01KHJQXTR5Z2DTQMWV0VKH5FG2 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:02.469208+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:02.476960+00:00 + - role: user + content: Start vacuuming + attachments: null + created: 2026-02-16 08:09:02.469308+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e863ed0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:02.476979+00:00 + duration_ms: 76.956 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_vacuuming-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_vacuuming-4.yaml new file mode 100644 index 000000000..945e94997 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-start_vacuuming-4.yaml @@ -0,0 +1,295 @@ +--- +uuid: 402c97a2-d1e8-423d-b3c0-2c78ed06419a +task_id: home1_us_vacuum-start_vacuuming-4 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Start vacuuming + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Start vacuuming + context: + id: 01KHJQXV361SXR04TXE680AK7V + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:02.822395+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:02.826153+00:00 + - role: user + content: Start vacuuming + attachments: null + created: 2026-02-16 08:09:02.822473+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e9d89e0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:02.826169+00:00 + duration_ms: 64.4 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_roborock_downstairs-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_roborock_downstairs-0.yaml new file mode 100644 index 000000000..100e97a14 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_roborock_downstairs-0.yaml @@ -0,0 +1,295 @@ +--- +uuid: 4aad1980-1b2d-4b4b-8e06-c35468548ff5 +task_id: home1_us_vacuum-stop_roborock_downstairs-0 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Stop Roborock Downstairs + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Stop Roborock Downstairs + context: + id: 01KHJQYC8JA5R88X8D8PT6AXNK + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:20.402610+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:20.404533+00:00 + - role: user + content: Stop Roborock Downstairs + attachments: null + created: 2026-02-16 08:09:20.402684+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d0b0b40>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:20.404546+00:00 + duration_ms: 61.431 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_roborock_downstairs-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_roborock_downstairs-1.yaml new file mode 100644 index 000000000..08c19a8ce --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_roborock_downstairs-1.yaml @@ -0,0 +1,295 @@ +--- +uuid: f66555a8-4c7e-4726-862c-cbcce9a428d1 +task_id: home1_us_vacuum-stop_roborock_downstairs-1 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Stop Roborock Downstairs + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Stop Roborock Downstairs + context: + id: 01KHJQYCKB6R9SPQRRFZX9TJW7 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:20.747936+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:20.754760+00:00 + - role: user + content: Stop Roborock Downstairs + attachments: null + created: 2026-02-16 08:09:20.748012+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16cbb1a60>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:20.754778+00:00 + duration_ms: 65.828 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_roborock_downstairs-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_roborock_downstairs-2.yaml new file mode 100644 index 000000000..cc3abddf9 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_roborock_downstairs-2.yaml @@ -0,0 +1,295 @@ +--- +uuid: b0ba5415-43c9-4895-86b1-ff6d08e55a1b +task_id: home1_us_vacuum-stop_roborock_downstairs-2 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Stop Roborock Downstairs + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Stop Roborock Downstairs + context: + id: 01KHJQYCXK3XZWFXM1X4VKY3MS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:21.075427+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:21.077216+00:00 + - role: user + content: Stop Roborock Downstairs + attachments: null + created: 2026-02-16 08:09:21.075504+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d8af740>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:21.077242+00:00 + duration_ms: 56.864 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_roborock_downstairs-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_roborock_downstairs-3.yaml new file mode 100644 index 000000000..e5f36d692 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_roborock_downstairs-3.yaml @@ -0,0 +1,295 @@ +--- +uuid: e008d385-b7c0-4536-a1e7-cf7d595e55e2 +task_id: home1_us_vacuum-stop_roborock_downstairs-3 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Stop Roborock Downstairs + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Stop Roborock Downstairs + context: + id: 01KHJQYD79S0GXAD64YZGZ6N19 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:21.385369+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:21.388723+00:00 + - role: user + content: Stop Roborock Downstairs + attachments: null + created: 2026-02-16 08:09:21.385446+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16ea9ada0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:21.388738+00:00 + duration_ms: 62.275 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_roborock_downstairs-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_roborock_downstairs-4.yaml new file mode 100644 index 000000000..2f0354a41 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_roborock_downstairs-4.yaml @@ -0,0 +1,295 @@ +--- +uuid: 4dd8d808-e1d8-4793-896e-0b70a3f97f10 +task_id: home1_us_vacuum-stop_roborock_downstairs-4 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Stop Roborock Downstairs + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Stop Roborock Downstairs + context: + id: 01KHJQYDK78X4CX50XGKCDBZJV + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:21.767468+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:21.769378+00:00 + - role: user + content: Stop Roborock Downstairs + attachments: null + created: 2026-02-16 08:09:21.767543+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d926090>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:21.769391+00:00 + duration_ms: 66.004 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_downstairs_vacuum-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_downstairs_vacuum-0.yaml new file mode 100644 index 000000000..63916fe2c --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_downstairs_vacuum-0.yaml @@ -0,0 +1,295 @@ +--- +uuid: 25ea2aff-c289-40f5-b3d9-310ecffde9a9 +task_id: home1_us_vacuum-stop_the_downstairs_vacuum-0 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Stop the downstairs vacuum + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Stop the downstairs vacuum + context: + id: 01KHJQYHWW8HW7XVYV8XPW0SK9 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:26.172144+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:26.183916+00:00 + - role: user + content: Stop the downstairs vacuum + attachments: null + created: 2026-02-16 08:09:26.172250+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d022f00>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:26.183937+00:00 + duration_ms: 72.091 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_downstairs_vacuum-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_downstairs_vacuum-1.yaml new file mode 100644 index 000000000..4353074bc --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_downstairs_vacuum-1.yaml @@ -0,0 +1,295 @@ +--- +uuid: daa0ab7e-8c59-46ee-acdf-f2601c287d33 +task_id: home1_us_vacuum-stop_the_downstairs_vacuum-1 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Stop the downstairs vacuum + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Stop the downstairs vacuum + context: + id: 01KHJQYJ6SEKVMM2WDZAE673DM + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:26.490088+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:26.491992+00:00 + - role: user + content: Stop the downstairs vacuum + attachments: null + created: 2026-02-16 08:09:26.490164+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d8ad850>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:26.492006+00:00 + duration_ms: 156.018 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_downstairs_vacuum-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_downstairs_vacuum-2.yaml new file mode 100644 index 000000000..ab709ce0e --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_downstairs_vacuum-2.yaml @@ -0,0 +1,295 @@ +--- +uuid: cbd5ab18-20dd-4f58-9001-4dc3807749ae +task_id: home1_us_vacuum-stop_the_downstairs_vacuum-2 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Stop the downstairs vacuum + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Stop the downstairs vacuum + context: + id: 01KHJQYJKHVZN4NCZNE481GZ35 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:26.897279+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:26.899091+00:00 + - role: user + content: Stop the downstairs vacuum + attachments: null + created: 2026-02-16 08:09:26.897354+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16fe2c670>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:26.899104+00:00 + duration_ms: 151.787 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_downstairs_vacuum-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_downstairs_vacuum-3.yaml new file mode 100644 index 000000000..ae147c2f8 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_downstairs_vacuum-3.yaml @@ -0,0 +1,295 @@ +--- +uuid: ec7ca65a-f010-46e6-9604-f3f0e15c43f8 +task_id: home1_us_vacuum-stop_the_downstairs_vacuum-3 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Stop the downstairs vacuum + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Stop the downstairs vacuum + context: + id: 01KHJQYK0HA4DJF40XMK86PXQV + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:27.313978+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:27.319355+00:00 + - role: user + content: Stop the downstairs vacuum + attachments: null + created: 2026-02-16 08:09:27.314053+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c7afd70>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:27.319371+00:00 + duration_ms: 142.877 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_downstairs_vacuum-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_downstairs_vacuum-4.yaml new file mode 100644 index 000000000..8332e8470 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_downstairs_vacuum-4.yaml @@ -0,0 +1,295 @@ +--- +uuid: 7192bd99-62a8-49de-9bba-531a2c5bbd50 +task_id: home1_us_vacuum-stop_the_downstairs_vacuum-4 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Stop the downstairs vacuum + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Stop the downstairs vacuum + context: + id: 01KHJQYKE7EYFSSP9W526HQG36 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:27.751267+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:27.753128+00:00 + - role: user + content: Stop the downstairs vacuum + attachments: null + created: 2026-02-16 08:09:27.751355+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d358510>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:27.753141+00:00 + duration_ms: 155.064 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_vacuum-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_vacuum-0.yaml new file mode 100644 index 000000000..175c7473b --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_vacuum-0.yaml @@ -0,0 +1,295 @@ +--- +uuid: 4815e79c-24bc-4d1f-a201-98968c441399 +task_id: home1_us_vacuum-stop_the_vacuum-0 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Stop the vacuum + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Stop the vacuum + context: + id: 01KHJQYDXKSC02ADK6B08GDA2C + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:22.099697+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:22.101539+00:00 + - role: user + content: Stop the vacuum + attachments: null + created: 2026-02-16 08:09:22.099773+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d443530>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:22.101552+00:00 + duration_ms: 63.398 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_vacuum-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_vacuum-1.yaml new file mode 100644 index 000000000..724c2645c --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_vacuum-1.yaml @@ -0,0 +1,295 @@ +--- +uuid: 9040d5e5-eb2f-454c-9ce5-c028c859a25d +task_id: home1_us_vacuum-stop_the_vacuum-1 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Stop the vacuum + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Stop the vacuum + context: + id: 01KHJQYE7QGEWW3QSATQTB7VRB + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:22.423422+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:22.425669+00:00 + - role: user + content: Stop the vacuum + attachments: null + created: 2026-02-16 08:09:22.423498+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e9d9e80>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:22.425682+00:00 + duration_ms: 62.326 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_vacuum-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_vacuum-2.yaml new file mode 100644 index 000000000..b0d339018 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_vacuum-2.yaml @@ -0,0 +1,295 @@ +--- +uuid: ed015fa8-caf5-44de-b7bf-3b3ef91cbc7f +task_id: home1_us_vacuum-stop_the_vacuum-2 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Stop the vacuum + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Stop the vacuum + context: + id: 01KHJQYEJ0N2108GBBC8GTPPNV + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:22.752849+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:22.756414+00:00 + - role: user + content: Stop the vacuum + attachments: null + created: 2026-02-16 08:09:22.752922+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17dcda8d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:22.756428+00:00 + duration_ms: 67.035 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_vacuum-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_vacuum-3.yaml new file mode 100644 index 000000000..ccfdcd0b9 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_vacuum-3.yaml @@ -0,0 +1,295 @@ +--- +uuid: b5602c5e-57e9-4126-8619-f6192768785c +task_id: home1_us_vacuum-stop_the_vacuum-3 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Stop the vacuum + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Stop the vacuum + context: + id: 01KHJQYEW8T46PVHXWNC6E80T8 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:23.080607+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:23.083094+00:00 + - role: user + content: Stop the vacuum + attachments: null + created: 2026-02-16 08:09:23.080679+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d0bc040>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:23.083108+00:00 + duration_ms: 148.028 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_vacuum-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_vacuum-4.yaml new file mode 100644 index 000000000..f6c9e1963 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_the_vacuum-4.yaml @@ -0,0 +1,295 @@ +--- +uuid: 8f8c8ed0-adfa-40ac-9463-fdbd0e3a1ffd +task_id: home1_us_vacuum-stop_the_vacuum-4 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Stop the vacuum + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Stop the vacuum + context: + id: 01KHJQYF95TQS8D4PGE05FYE2G + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:23.493713+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:23.497367+00:00 + - role: user + content: Stop the vacuum + attachments: null + created: 2026-02-16 08:09:23.493787+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16fd1c460>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:23.497383+00:00 + duration_ms: 353.456 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_vacuuming-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_vacuuming-0.yaml new file mode 100644 index 000000000..e98894f22 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_vacuuming-0.yaml @@ -0,0 +1,295 @@ +--- +uuid: a05f565c-25b4-4200-8a6a-f5973eef0fef +task_id: home1_us_vacuum-stop_vacuuming-0 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Stop vacuuming + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Stop vacuuming + context: + id: 01KHJQYFWKWG50YG0TTKVFQ526 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:24.115812+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:24.117680+00:00 + - role: user + content: Stop vacuuming + attachments: null + created: 2026-02-16 08:09:24.115885+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d7b2ae0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:24.117693+00:00 + duration_ms: 57.314 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_vacuuming-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_vacuuming-1.yaml new file mode 100644 index 000000000..60e4798e3 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_vacuuming-1.yaml @@ -0,0 +1,295 @@ +--- +uuid: bc96f6cc-7bc5-4624-a350-1f33be8b237e +task_id: home1_us_vacuum-stop_vacuuming-1 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Stop vacuuming + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Stop vacuuming + context: + id: 01KHJQYG6JFCN0M5CQ9FHAS63F + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:24.434593+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:24.437485+00:00 + - role: user + content: Stop vacuuming + attachments: null + created: 2026-02-16 08:09:24.434668+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e7c14e0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:24.437499+00:00 + duration_ms: 155.603 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_vacuuming-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_vacuuming-2.yaml new file mode 100644 index 000000000..f7932aa8a --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_vacuuming-2.yaml @@ -0,0 +1,295 @@ +--- +uuid: b852c074-2385-456b-8d7b-250a9d5fcfc5 +task_id: home1_us_vacuum-stop_vacuuming-2 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Stop vacuuming + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Stop vacuuming + context: + id: 01KHJQYGKHTNNSQ909VH1KFAWS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:24.849580+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:24.851479+00:00 + - role: user + content: Stop vacuuming + attachments: null + created: 2026-02-16 08:09:24.849653+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d006140>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:24.851491+00:00 + duration_ms: 59.448 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_vacuuming-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_vacuuming-3.yaml new file mode 100644 index 000000000..bfe46a376 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_vacuuming-3.yaml @@ -0,0 +1,295 @@ +--- +uuid: 41aac126-5d68-45e3-aaf8-373484f2226a +task_id: home1_us_vacuum-stop_vacuuming-3 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Stop vacuuming + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Stop vacuuming + context: + id: 01KHJQYH4T27SHK9YG69F0XHRH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:25.402321+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:25.405976+00:00 + - role: user + content: Stop vacuuming + attachments: null + created: 2026-02-16 08:09:25.402394+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16eb68d50>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:25.405989+00:00 + duration_ms: 155.641 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_vacuuming-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_vacuuming-4.yaml new file mode 100644 index 000000000..fe5b07963 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-stop_vacuuming-4.yaml @@ -0,0 +1,295 @@ +--- +uuid: c390cbf6-6c14-4719-bf4f-fc21c66e03e4 +task_id: home1_us_vacuum-stop_vacuuming-4 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Stop vacuuming + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Stop vacuuming + context: + id: 01KHJQYHJ1WPWP03VWEAJ9WZQE + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:25.825699+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:25.829397+00:00 + - role: user + content: Stop vacuuming + attachments: null + created: 2026-02-16 08:09:25.825774+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d5eb060>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:25.829412+00:00 + duration_ms: 60.689 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-turn_on_the_vacuum-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-turn_on_the_vacuum-0.yaml new file mode 100644 index 000000000..beb42e113 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-turn_on_the_vacuum-0.yaml @@ -0,0 +1,295 @@ +--- +uuid: 0b2a3550-4707-4c02-9c85-5b9683dac3fe +task_id: home1_us_vacuum-turn_on_the_vacuum-0 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Turn on the vacuum + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Turn on the vacuum + context: + id: 01KHJQY3C65HP1YTJJTJR7NHYP + parent_id: null + user_id: null + conversation_id: null + device_id: 307327317c6457b87c682ab6008e6782 + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:11.302374+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + You are in area Living Room and all generic commands like 'turn on the lights' should target this area. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:11.307455+00:00 + - role: user + content: Turn on the vacuum + attachments: null + created: 2026-02-16 08:09:11.302451+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16dc710c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:11.307470+00:00 + duration_ms: 61.717 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-turn_on_the_vacuum-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-turn_on_the_vacuum-1.yaml new file mode 100644 index 000000000..da6d3ec84 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-turn_on_the_vacuum-1.yaml @@ -0,0 +1,295 @@ +--- +uuid: e351b039-114a-4b0a-bda4-0c284fe2b681 +task_id: home1_us_vacuum-turn_on_the_vacuum-1 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Turn on the vacuum + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Turn on the vacuum + context: + id: 01KHJQY3PDGB9RH2CQRGPRXY57 + parent_id: null + user_id: null + conversation_id: null + device_id: 191cf7e95eb693e7228d49645045deec + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:11.629303+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + You are in area Living Room and all generic commands like 'turn on the lights' should target this area. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:11.631237+00:00 + - role: user + content: Turn on the vacuum + attachments: null + created: 2026-02-16 08:09:11.629380+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d61b480>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:11.631249+00:00 + duration_ms: 58.245 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-turn_on_the_vacuum-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-turn_on_the_vacuum-2.yaml new file mode 100644 index 000000000..443440053 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-turn_on_the_vacuum-2.yaml @@ -0,0 +1,295 @@ +--- +uuid: 74ce9341-1b37-4d3a-977e-374e9c948da2 +task_id: home1_us_vacuum-turn_on_the_vacuum-2 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Turn on the vacuum + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Turn on the vacuum + context: + id: 01KHJQY40RREPD9KASX5HZ6HZK + parent_id: null + user_id: null + conversation_id: null + device_id: 71b1e42a20c95c1c633cbbc667c169c8 + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:11.960376+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + You are in area Living Room and all generic commands like 'turn on the lights' should target this area. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:11.962269+00:00 + - role: user + content: Turn on the vacuum + attachments: null + created: 2026-02-16 08:09:11.960453+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d0f3b60>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:11.962281+00:00 + duration_ms: 142.156 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-turn_on_the_vacuum-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-turn_on_the_vacuum-3.yaml new file mode 100644 index 000000000..fa024a5c6 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-turn_on_the_vacuum-3.yaml @@ -0,0 +1,295 @@ +--- +uuid: e0acd7d3-00f0-4c4b-b0c7-b94e4d07f885 +task_id: home1_us_vacuum-turn_on_the_vacuum-3 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Turn on the vacuum + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Turn on the vacuum + context: + id: 01KHJQY4DHRCH6479C7VR6SK83 + parent_id: null + user_id: null + conversation_id: null + device_id: 47f0f0231b5e67d7efbe47f9fc5e235c + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:12.369184+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + You are in area Living Room and all generic commands like 'turn on the lights' should target this area. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:12.372725+00:00 + - role: user + content: Turn on the vacuum + attachments: null + created: 2026-02-16 08:09:12.369292+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16eb5a820>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:12.372739+00:00 + duration_ms: 163.065 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-turn_on_the_vacuum-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-turn_on_the_vacuum-4.yaml new file mode 100644 index 000000000..f28885b4f --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-turn_on_the_vacuum-4.yaml @@ -0,0 +1,295 @@ +--- +uuid: bc0c60f7-0406-4bb2-a1a6-d85426122eed +task_id: home1_us_vacuum-turn_on_the_vacuum-4 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Turn on the vacuum + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Turn on the vacuum + context: + id: 01KHJQY4WQD4H46H578VSMR318 + parent_id: null + user_id: null + conversation_id: null + device_id: 7ba3b7edb76644766937c0e7b0fa00c1 + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:12.855907+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + You are in area Living Room and all generic commands like 'turn on the lights' should target this area. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:12.857833+00:00 + - role: user + content: Turn on the vacuum + attachments: null + created: 2026-02-16 08:09:12.855983+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d9f77f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:12.857847+00:00 + duration_ms: 142.319 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_downstairs-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_downstairs-0.yaml new file mode 100644 index 000000000..59d34fc15 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_downstairs-0.yaml @@ -0,0 +1,295 @@ +--- +uuid: c4795f0a-1b4f-4e79-9048-7eb6255108b3 +task_id: home1_us_vacuum-vacuum_downstairs-0 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Vacuum downstairs + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Vacuum downstairs + context: + id: 01KHJQXVDRC48TGCR5Z1646RVH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:03.160893+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:03.163723+00:00 + - role: user + content: Vacuum downstairs + attachments: null + created: 2026-02-16 08:09:03.160972+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16dd96820>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:03.163739+00:00 + duration_ms: 145.254 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_downstairs-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_downstairs-1.yaml new file mode 100644 index 000000000..a358a6e14 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_downstairs-1.yaml @@ -0,0 +1,295 @@ +--- +uuid: 657b8945-752a-4934-b7bf-b34040687021 +task_id: home1_us_vacuum-vacuum_downstairs-1 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Vacuum downstairs + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Vacuum downstairs + context: + id: 01KHJQXVW33JP8BCB28PCV0NGA + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:03.619940+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:03.621884+00:00 + - role: user + content: Vacuum downstairs + attachments: null + created: 2026-02-16 08:09:03.620016+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16ff217a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:03.621898+00:00 + duration_ms: 64.647 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_downstairs-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_downstairs-2.yaml new file mode 100644 index 000000000..14b9d6acc --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_downstairs-2.yaml @@ -0,0 +1,295 @@ +--- +uuid: 9db1c682-4675-49e2-881f-d786a256350e +task_id: home1_us_vacuum-vacuum_downstairs-2 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Vacuum downstairs + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Vacuum downstairs + context: + id: 01KHJQXW6N678GBFYCZ32QDN5C + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:03.957681+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:03.959592+00:00 + - role: user + content: Vacuum downstairs + attachments: null + created: 2026-02-16 08:09:03.957758+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d152770>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:03.959606+00:00 + duration_ms: 154.484 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_downstairs-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_downstairs-3.yaml new file mode 100644 index 000000000..9cd470d8d --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_downstairs-3.yaml @@ -0,0 +1,295 @@ +--- +uuid: 31a650fb-063c-44b8-b6f1-bcbc9de5bfd5 +task_id: home1_us_vacuum-vacuum_downstairs-3 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Vacuum downstairs + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Vacuum downstairs + context: + id: 01KHJQXWK35VCYXSGQZD87R5XH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:04.355484+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:04.358123+00:00 + - role: user + content: Vacuum downstairs + attachments: null + created: 2026-02-16 08:09:04.355564+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e71a770>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:04.358139+00:00 + duration_ms: 149.786 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_downstairs-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_downstairs-4.yaml new file mode 100644 index 000000000..b102811ac --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_downstairs-4.yaml @@ -0,0 +1,295 @@ +--- +uuid: 99076582-869d-48e3-98f8-38a87b15fcbd +task_id: home1_us_vacuum-vacuum_downstairs-4 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Vacuum downstairs + expect_changes: + vacuum.roborock_downstairs: + state: cleaning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: cleaning + got: + state: 'off' + conversation_trace: + - event_type: async_process + data: + text: Vacuum downstairs + context: + id: 01KHJQXWZZ580JPE1DR15T08GJ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:04.767607+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:04.770531+00:00 + - role: user + content: Vacuum downstairs + attachments: null + created: 2026-02-16 08:09:04.767686+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16fd42c40>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:04.770545+00:00 + duration_ms: 160.014 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_return_to_base-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_return_to_base-0.yaml new file mode 100644 index 000000000..650685092 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_return_to_base-0.yaml @@ -0,0 +1,295 @@ +--- +uuid: e2ec80c3-b7f7-4ff0-85e6-d1fb9341c42d +task_id: home1_us_vacuum-vacuum_return_to_base-0 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Vacuum return to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Vacuum return to base + context: + id: 01KHJQYAFDQDHWM6J0F5FTSK02 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:18.573315+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:18.576934+00:00 + - role: user + content: Vacuum return to base + attachments: null + created: 2026-02-16 08:09:18.573393+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c8b3530>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:18.576949+00:00 + duration_ms: 165.856 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_return_to_base-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_return_to_base-1.yaml new file mode 100644 index 000000000..e0c457730 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_return_to_base-1.yaml @@ -0,0 +1,295 @@ +--- +uuid: 6e30ceda-89fb-420e-b797-64a084e97578 +task_id: home1_us_vacuum-vacuum_return_to_base-1 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Vacuum return to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Vacuum return to base + context: + id: 01KHJQYAXWVX6R8Y8AYWEVHMX2 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:19.036358+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:19.038288+00:00 + - role: user + content: Vacuum return to base + attachments: null + created: 2026-02-16 08:09:19.036437+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17dcdefb0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:19.038300+00:00 + duration_ms: 144.358 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_return_to_base-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_return_to_base-2.yaml new file mode 100644 index 000000000..821fec658 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_return_to_base-2.yaml @@ -0,0 +1,295 @@ +--- +uuid: 5ed922cf-7148-4dd5-aac1-16f8adb9e995 +task_id: home1_us_vacuum-vacuum_return_to_base-2 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Vacuum return to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Vacuum return to base + context: + id: 01KHJQYBA0PAJVQBYVBTMM4SPG + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:19.424893+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:19.426778+00:00 + - role: user + content: Vacuum return to base + attachments: null + created: 2026-02-16 08:09:19.424970+00:00 + tools: + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d330eb0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:19.426793+00:00 + duration_ms: 57.881 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_return_to_base-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_return_to_base-3.yaml new file mode 100644 index 000000000..7dbc75308 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_return_to_base-3.yaml @@ -0,0 +1,295 @@ +--- +uuid: 3b4f2ce4-44af-41a7-b0e3-2786001d2110 +task_id: home1_us_vacuum-vacuum_return_to_base-3 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Vacuum return to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Vacuum return to base + context: + id: 01KHJQYBKR7DYE776H52WH5TRS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:19.736702+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:19.738534+00:00 + - role: user + content: Vacuum return to base + attachments: null + created: 2026-02-16 08:09:19.736781+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16fc77d70>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:19.738546+00:00 + duration_ms: 62.583 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_return_to_base-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_return_to_base-4.yaml new file mode 100644 index 000000000..01143db47 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home1_us_vacuum-vacuum_return_to_base-4.yaml @@ -0,0 +1,295 @@ +--- +uuid: 8cf25f32-452c-4eaa-996b-68ba1ec9453b +task_id: home1_us_vacuum-vacuum_return_to_base-4 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: Vacuum return to base + expect_changes: + vacuum.roborock_downstairs: + state: returning + attributes: null +response: Error talking to API +context: + unexpected_states: + vacuum.roborock_downstairs: + expected: + state: returning + got: + state: cleaning + conversation_trace: + - event_type: async_process + data: + text: Vacuum return to base + context: + id: 01KHJQYBYAXDPCPW3EEFX9KN74 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:20.074754+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Camera Motion + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Person + domain: binary_sensor + areas: Backyard + - names: Backyard Camera Sound + domain: binary_sensor + areas: Backyard + - names: Backyard Light + domain: light + areas: Backyard + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Game Room Light + domain: light + areas: Game Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Motion Sensor + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: binary_sensor + areas: Living Room + - names: Living Room Motion Sensor Battery + domain: sensor + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Smart Blinds + domain: cover + areas: Master Bedroom + - names: Master Bedroom Smart Blinds Battery + domain: sensor + areas: Master Bedroom + - names: Nest Hub + domain: media_player + areas: Living Room + - names: Rear Door Lock + domain: lock + areas: Garage + - names: Roborock Downstairs + domain: vacuum + areas: Living Room + - names: Smart Lock + domain: binary_sensor + areas: Entry + - names: Smart Lock + domain: lock + areas: Entry + - names: Smart Lock Battery + domain: binary_sensor + areas: Entry + - names: Smart Lock Battery + domain: sensor + areas: Entry + - names: Smart Lock Tamper + domain: binary_sensor + areas: Entry + - names: Smart Speaker + domain: media_player + areas: Game Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:20.077123+00:00 + - role: user + content: Vacuum return to base + attachments: null + created: 2026-02-16 08:09:20.074830+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e8e7ab0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:20.077137+00:00 + duration_ms: 60.652 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_front_yard_valve-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_front_yard_valve-0.yaml new file mode 100644 index 000000000..f106b8ff5 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_front_yard_valve-0.yaml @@ -0,0 +1,199 @@ +--- +uuid: 0b64faac-e31a-4615-b812-7b619a89a2da +task_id: home2_ru_water_valve-close_the_front_yard_valve-0 +model_id: gemma-3-27b-it +category: valve +task: + input_text: close the front yard valve + expect_changes: + valve.irrigation_valve: + state: closed + attributes: + current_position: 0 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: closed + current_position: 0 + got: + state: open + current_position: 100 + conversation_trace: + - event_type: async_process + data: + text: close the front yard valve + context: + id: 01KHJQYVRK9NN5GGJPHPGVC89Y + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:36.275525+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:36.278565+00:00 + - role: user + content: close the front yard valve + attachments: null + created: 2026-02-16 08:09:36.275600+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:36.278579+00:00 + duration_ms: 43.647 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_front_yard_valve-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_front_yard_valve-1.yaml new file mode 100644 index 000000000..8e4c0e297 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_front_yard_valve-1.yaml @@ -0,0 +1,199 @@ +--- +uuid: 6826c3db-9a2a-44a1-b55f-1d150eb908b8 +task_id: home2_ru_water_valve-close_the_front_yard_valve-1 +model_id: gemma-3-27b-it +category: valve +task: + input_text: close the front yard valve + expect_changes: + valve.irrigation_valve: + state: closed + attributes: + current_position: 0 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: closed + current_position: 0 + got: + state: open + current_position: 100 + conversation_trace: + - event_type: async_process + data: + text: close the front yard valve + context: + id: 01KHJQYW1QQ4B1Z3S6GWS968QR + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:36.567957+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:36.571114+00:00 + - role: user + content: close the front yard valve + attachments: null + created: 2026-02-16 08:09:36.568035+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:36.571129+00:00 + duration_ms: 45.228 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_front_yard_valve-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_front_yard_valve-2.yaml new file mode 100644 index 000000000..efe2fd041 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_front_yard_valve-2.yaml @@ -0,0 +1,199 @@ +--- +uuid: 4d4864ed-d962-4adb-b82f-8cb05fa0d4dc +task_id: home2_ru_water_valve-close_the_front_yard_valve-2 +model_id: gemma-3-27b-it +category: valve +task: + input_text: close the front yard valve + expect_changes: + valve.irrigation_valve: + state: closed + attributes: + current_position: 0 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: closed + current_position: 0 + got: + state: open + current_position: 100 + conversation_trace: + - event_type: async_process + data: + text: close the front yard valve + context: + id: 01KHJQYWATCXG62VWTQXJ809WN + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:36.859071+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:36.860254+00:00 + - role: user + content: close the front yard valve + attachments: null + created: 2026-02-16 08:09:36.859144+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:36.860266+00:00 + duration_ms: 122.967 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_front_yard_valve-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_front_yard_valve-3.yaml new file mode 100644 index 000000000..a9b3bb4ed --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_front_yard_valve-3.yaml @@ -0,0 +1,199 @@ +--- +uuid: ce30fd92-c7ae-4660-b47c-ba5b9afbd865 +task_id: home2_ru_water_valve-close_the_front_yard_valve-3 +model_id: gemma-3-27b-it +category: valve +task: + input_text: close the front yard valve + expect_changes: + valve.irrigation_valve: + state: closed + attributes: + current_position: 0 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: closed + current_position: 0 + got: + state: open + current_position: 100 + conversation_trace: + - event_type: async_process + data: + text: close the front yard valve + context: + id: 01KHJQYWQX8WF333E2AA0RX1PM + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:37.278091+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:37.289102+00:00 + - role: user + content: close the front yard valve + attachments: null + created: 2026-02-16 08:09:37.278165+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:37.289123+00:00 + duration_ms: 52.657 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_front_yard_valve-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_front_yard_valve-4.yaml new file mode 100644 index 000000000..d591da908 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_front_yard_valve-4.yaml @@ -0,0 +1,199 @@ +--- +uuid: 9e965fe7-89cb-43e8-ae00-ccd0235ce078 +task_id: home2_ru_water_valve-close_the_front_yard_valve-4 +model_id: gemma-3-27b-it +category: valve +task: + input_text: close the front yard valve + expect_changes: + valve.irrigation_valve: + state: closed + attributes: + current_position: 0 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: closed + current_position: 0 + got: + state: open + current_position: 100 + conversation_trace: + - event_type: async_process + data: + text: close the front yard valve + context: + id: 01KHJQYX0H6VNN0FX0JFEQY86N + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:37.553753+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:37.554870+00:00 + - role: user + content: close the front yard valve + attachments: null + created: 2026-02-16 08:09:37.553827+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:37.554882+00:00 + duration_ms: 126.478 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_irrigation_valve-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_irrigation_valve-0.yaml new file mode 100644 index 000000000..a4e21c0fd --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_irrigation_valve-0.yaml @@ -0,0 +1,199 @@ +--- +uuid: e10768df-99cf-4721-950e-34aa70949182 +task_id: home2_ru_water_valve-close_the_irrigation_valve-0 +model_id: gemma-3-27b-it +category: valve +task: + input_text: close the irrigation valve + expect_changes: + valve.irrigation_valve: + state: closed + attributes: + current_position: 0 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: closed + current_position: 0 + got: + state: open + current_position: 100 + conversation_trace: + - event_type: async_process + data: + text: close the irrigation valve + context: + id: 01KHJQYXBHETCE88YFBNMDP54W + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:37.905923+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:37.907019+00:00 + - role: user + content: close the irrigation valve + attachments: null + created: 2026-02-16 08:09:37.905998+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:37.907031+00:00 + duration_ms: 124.264 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_irrigation_valve-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_irrigation_valve-1.yaml new file mode 100644 index 000000000..eed93f1b2 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_irrigation_valve-1.yaml @@ -0,0 +1,199 @@ +--- +uuid: 087a4777-9d60-4aa7-af74-0e50afc7978a +task_id: home2_ru_water_valve-close_the_irrigation_valve-1 +model_id: gemma-3-27b-it +category: valve +task: + input_text: close the irrigation valve + expect_changes: + valve.irrigation_valve: + state: closed + attributes: + current_position: 0 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: closed + current_position: 0 + got: + state: open + current_position: 100 + conversation_trace: + - event_type: async_process + data: + text: close the irrigation valve + context: + id: 01KHJQYXPJFHEVAN9MXHPM0KQA + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:38.258896+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:38.260020+00:00 + - role: user + content: close the irrigation valve + attachments: null + created: 2026-02-16 08:09:38.258974+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:38.260032+00:00 + duration_ms: 142.184 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_irrigation_valve-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_irrigation_valve-2.yaml new file mode 100644 index 000000000..ada1aa32b --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_irrigation_valve-2.yaml @@ -0,0 +1,199 @@ +--- +uuid: 244beaad-60cd-45af-aaca-d56296560ae9 +task_id: home2_ru_water_valve-close_the_irrigation_valve-2 +model_id: gemma-3-27b-it +category: valve +task: + input_text: close the irrigation valve + expect_changes: + valve.irrigation_valve: + state: closed + attributes: + current_position: 0 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: closed + current_position: 0 + got: + state: open + current_position: 100 + conversation_trace: + - event_type: async_process + data: + text: close the irrigation valve + context: + id: 01KHJQYY2NW4RDX65C2D1NP40Q + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:38.645157+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:38.646288+00:00 + - role: user + content: close the irrigation valve + attachments: null + created: 2026-02-16 08:09:38.645258+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:38.646299+00:00 + duration_ms: 39.4 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_irrigation_valve-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_irrigation_valve-3.yaml new file mode 100644 index 000000000..3c7ac0f65 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_irrigation_valve-3.yaml @@ -0,0 +1,199 @@ +--- +uuid: 2c10cce0-2f74-4b43-811f-345379425ebc +task_id: home2_ru_water_valve-close_the_irrigation_valve-3 +model_id: gemma-3-27b-it +category: valve +task: + input_text: close the irrigation valve + expect_changes: + valve.irrigation_valve: + state: closed + attributes: + current_position: 0 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: closed + current_position: 0 + got: + state: open + current_position: 100 + conversation_trace: + - event_type: async_process + data: + text: close the irrigation valve + context: + id: 01KHJQYYBDWH3K2WVTFEZCJPFP + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:38.925273+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:38.926913+00:00 + - role: user + content: close the irrigation valve + attachments: null + created: 2026-02-16 08:09:38.925360+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:38.926927+00:00 + duration_ms: 37.291 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_irrigation_valve-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_irrigation_valve-4.yaml new file mode 100644 index 000000000..5e64d43c6 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-close_the_irrigation_valve-4.yaml @@ -0,0 +1,199 @@ +--- +uuid: 79dc501e-41e8-4403-bff2-bc6079ee4442 +task_id: home2_ru_water_valve-close_the_irrigation_valve-4 +model_id: gemma-3-27b-it +category: valve +task: + input_text: close the irrigation valve + expect_changes: + valve.irrigation_valve: + state: closed + attributes: + current_position: 0 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: closed + current_position: 0 + got: + state: open + current_position: 100 + conversation_trace: + - event_type: async_process + data: + text: close the irrigation valve + context: + id: 01KHJQYYMWRHAP7YFKCY95314S + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:39.228406+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:39.229550+00:00 + - role: user + content: close the irrigation valve + attachments: null + created: 2026-02-16 08:09:39.228482+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:39.229562+00:00 + duration_ms: 38.655 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_front_yard_valve-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_front_yard_valve-0.yaml new file mode 100644 index 000000000..28eb3b37b --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_front_yard_valve-0.yaml @@ -0,0 +1,199 @@ +--- +uuid: e00f89ad-29c0-49bd-b942-be023738e21b +task_id: home2_ru_water_valve-open_the_front_yard_valve-0 +model_id: gemma-3-27b-it +category: valve +task: + input_text: open the front yard valve + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: open the front yard valve + context: + id: 01KHJQYKTT31D7JY1ZV0SK3DB7 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:28.155050+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:28.159517+00:00 + - role: user + content: open the front yard valve + attachments: null + created: 2026-02-16 08:09:28.155128+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:28.159534+00:00 + duration_ms: 39.987 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_front_yard_valve-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_front_yard_valve-1.yaml new file mode 100644 index 000000000..626d97dd6 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_front_yard_valve-1.yaml @@ -0,0 +1,199 @@ +--- +uuid: ecf81f59-5404-4246-af73-c9bce0a2098e +task_id: home2_ru_water_valve-open_the_front_yard_valve-1 +model_id: gemma-3-27b-it +category: valve +task: + input_text: open the front yard valve + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: open the front yard valve + context: + id: 01KHJQYM49FYGB1HC75K0AENE7 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:28.457717+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:28.462467+00:00 + - role: user + content: open the front yard valve + attachments: null + created: 2026-02-16 08:09:28.457793+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:28.462483+00:00 + duration_ms: 49.486 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_front_yard_valve-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_front_yard_valve-2.yaml new file mode 100644 index 000000000..5a116b060 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_front_yard_valve-2.yaml @@ -0,0 +1,199 @@ +--- +uuid: 5952084a-4d36-4dcf-81c3-c9809d648318 +task_id: home2_ru_water_valve-open_the_front_yard_valve-2 +model_id: gemma-3-27b-it +category: valve +task: + input_text: open the front yard valve + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: open the front yard valve + context: + id: 01KHJQYMD0DWFCNHDCZC7ZMRP4 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:28.736425+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:28.737940+00:00 + - role: user + content: open the front yard valve + attachments: null + created: 2026-02-16 08:09:28.736497+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:28.737953+00:00 + duration_ms: 40.262 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_front_yard_valve-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_front_yard_valve-3.yaml new file mode 100644 index 000000000..1bef58579 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_front_yard_valve-3.yaml @@ -0,0 +1,199 @@ +--- +uuid: dfdd04ca-2a09-48d7-a2fa-3aac1c1df451 +task_id: home2_ru_water_valve-open_the_front_yard_valve-3 +model_id: gemma-3-27b-it +category: valve +task: + input_text: open the front yard valve + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: open the front yard valve + context: + id: 01KHJQYMNJ5CBAK31RCTE2W8PY + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:29.010574+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:29.013171+00:00 + - role: user + content: open the front yard valve + attachments: null + created: 2026-02-16 08:09:29.010646+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:29.013185+00:00 + duration_ms: 43.735 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_front_yard_valve-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_front_yard_valve-4.yaml new file mode 100644 index 000000000..e7b99eeec --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_front_yard_valve-4.yaml @@ -0,0 +1,199 @@ +--- +uuid: d99c77f4-3286-46a6-a7bd-c8c47ee38346 +task_id: home2_ru_water_valve-open_the_front_yard_valve-4 +model_id: gemma-3-27b-it +category: valve +task: + input_text: open the front yard valve + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: open the front yard valve + context: + id: 01KHJQYN0GTFCQR4Z6PYSGRF67 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:29.361031+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:29.362238+00:00 + - role: user + content: open the front yard valve + attachments: null + created: 2026-02-16 08:09:29.361104+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:29.362251+00:00 + duration_ms: 43.666 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_irrigation_valve-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_irrigation_valve-0.yaml new file mode 100644 index 000000000..687ef73cb --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_irrigation_valve-0.yaml @@ -0,0 +1,199 @@ +--- +uuid: ac059b76-f3bb-4562-9aa6-7d8c43f4f609 +task_id: home2_ru_water_valve-open_the_irrigation_valve-0 +model_id: gemma-3-27b-it +category: valve +task: + input_text: open the irrigation valve + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: open the irrigation valve + context: + id: 01KHJQYN8QFJBWKZENJR1NNWKS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:29.623195+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:29.624358+00:00 + - role: user + content: open the irrigation valve + attachments: null + created: 2026-02-16 08:09:29.623294+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:29.624370+00:00 + duration_ms: 45.182 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_irrigation_valve-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_irrigation_valve-1.yaml new file mode 100644 index 000000000..3d2b20f9c --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_irrigation_valve-1.yaml @@ -0,0 +1,199 @@ +--- +uuid: d6e641d3-470c-4e3e-9856-2e9e9968b9b4 +task_id: home2_ru_water_valve-open_the_irrigation_valve-1 +model_id: gemma-3-27b-it +category: valve +task: + input_text: open the irrigation valve + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: open the irrigation valve + context: + id: 01KHJQYNHJSK2E5VC54JVF1G5P + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:29.906897+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:29.908415+00:00 + - role: user + content: open the irrigation valve + attachments: null + created: 2026-02-16 08:09:29.906973+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:29.908429+00:00 + duration_ms: 37.566 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_irrigation_valve-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_irrigation_valve-2.yaml new file mode 100644 index 000000000..682aba978 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_irrigation_valve-2.yaml @@ -0,0 +1,199 @@ +--- +uuid: 6cf672bc-93c9-47c1-a875-784e2b7d1baf +task_id: home2_ru_water_valve-open_the_irrigation_valve-2 +model_id: gemma-3-27b-it +category: valve +task: + input_text: open the irrigation valve + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: open the irrigation valve + context: + id: 01KHJQYNT1VEJRC43RFCZQ3VVP + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:30.177591+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:30.179146+00:00 + - role: user + content: open the irrigation valve + attachments: null + created: 2026-02-16 08:09:30.177667+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:30.179159+00:00 + duration_ms: 37.259 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_irrigation_valve-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_irrigation_valve-3.yaml new file mode 100644 index 000000000..fe4010444 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_irrigation_valve-3.yaml @@ -0,0 +1,199 @@ +--- +uuid: 82c256e5-43f6-4a59-b6d0-bb2b3e42adda +task_id: home2_ru_water_valve-open_the_irrigation_valve-3 +model_id: gemma-3-27b-it +category: valve +task: + input_text: open the irrigation valve + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: open the irrigation valve + context: + id: 01KHJQYP360M19JPQDGK0GR0E3 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:30.470419+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:30.476754+00:00 + - role: user + content: open the irrigation valve + attachments: null + created: 2026-02-16 08:09:30.470494+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:30.476773+00:00 + duration_ms: 133.69 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_irrigation_valve-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_irrigation_valve-4.yaml new file mode 100644 index 000000000..12915d002 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-open_the_irrigation_valve-4.yaml @@ -0,0 +1,199 @@ +--- +uuid: 897d7477-489d-403a-974b-2dcf03c6f3c7 +task_id: home2_ru_water_valve-open_the_irrigation_valve-4 +model_id: gemma-3-27b-it +category: valve +task: + input_text: open the irrigation valve + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: open the irrigation valve + context: + id: 01KHJQYPENB8P6YN3RDDEY3V7K + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:30.837668+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:30.838815+00:00 + - role: user + content: open the irrigation valve + attachments: null + created: 2026-02-16 08:09:30.837741+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:30.838829+00:00 + duration_ms: 42.508 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-set_the_irrigation_valve_to_50-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-set_the_irrigation_valve_to_50-0.yaml new file mode 100644 index 000000000..4a0c88140 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-set_the_irrigation_valve_to_50-0.yaml @@ -0,0 +1,199 @@ +--- +uuid: b524326d-d6f5-4152-8da9-85d6b490e1f2 +task_id: home2_ru_water_valve-set_the_irrigation_valve_to_50-0 +model_id: gemma-3-27b-it +category: valve +task: + input_text: set the irrigation valve to 50% + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 50 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 50 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: set the irrigation valve to 50% + context: + id: 01KHJQZ0K662R7YWZA3XYZ1ENR + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:41.222914+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:41.224011+00:00 + - role: user + content: set the irrigation valve to 50% + attachments: null + created: 2026-02-16 08:09:41.222991+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:41.224025+00:00 + duration_ms: 39.688 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-set_the_irrigation_valve_to_50-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-set_the_irrigation_valve_to_50-1.yaml new file mode 100644 index 000000000..961a801ee --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-set_the_irrigation_valve_to_50-1.yaml @@ -0,0 +1,199 @@ +--- +uuid: 11d57df2-bec8-435e-a353-81716b92960e +task_id: home2_ru_water_valve-set_the_irrigation_valve_to_50-1 +model_id: gemma-3-27b-it +category: valve +task: + input_text: set the irrigation valve to 50% + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 50 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 50 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: set the irrigation valve to 50% + context: + id: 01KHJQZ0VRR564WPBQD4MAB78K + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:41.496299+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:41.498430+00:00 + - role: user + content: set the irrigation valve to 50% + attachments: null + created: 2026-02-16 08:09:41.496372+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:41.498444+00:00 + duration_ms: 39.584 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-set_the_irrigation_valve_to_50-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-set_the_irrigation_valve_to_50-2.yaml new file mode 100644 index 000000000..f71088478 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-set_the_irrigation_valve_to_50-2.yaml @@ -0,0 +1,199 @@ +--- +uuid: 27325647-4490-4194-96fb-73c121ea033e +task_id: home2_ru_water_valve-set_the_irrigation_valve_to_50-2 +model_id: gemma-3-27b-it +category: valve +task: + input_text: set the irrigation valve to 50% + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 50 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 50 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: set the irrigation valve to 50% + context: + id: 01KHJQZ148S9GGS1QN4SFHS4ZA + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:41.768744+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:41.772582+00:00 + - role: user + content: set the irrigation valve to 50% + attachments: null + created: 2026-02-16 08:09:41.768817+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:41.772596+00:00 + duration_ms: 40.375 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-set_the_irrigation_valve_to_50-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-set_the_irrigation_valve_to_50-3.yaml new file mode 100644 index 000000000..37a4d0b2f --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-set_the_irrigation_valve_to_50-3.yaml @@ -0,0 +1,199 @@ +--- +uuid: 211317bb-555d-45af-9bf9-9049f51ece6b +task_id: home2_ru_water_valve-set_the_irrigation_valve_to_50-3 +model_id: gemma-3-27b-it +category: valve +task: + input_text: set the irrigation valve to 50% + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 50 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 50 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: set the irrigation valve to 50% + context: + id: 01KHJQZ1DKTXZ8RNMHEGD8YQCS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:42.067267+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:42.068828+00:00 + - role: user + content: set the irrigation valve to 50% + attachments: null + created: 2026-02-16 08:09:42.067342+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:42.068842+00:00 + duration_ms: 47.352 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-set_the_irrigation_valve_to_50-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-set_the_irrigation_valve_to_50-4.yaml new file mode 100644 index 000000000..f6015167a --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-set_the_irrigation_valve_to_50-4.yaml @@ -0,0 +1,199 @@ +--- +uuid: 980c50a3-3d86-4ed4-873d-c64c967458a8 +task_id: home2_ru_water_valve-set_the_irrigation_valve_to_50-4 +model_id: gemma-3-27b-it +category: valve +task: + input_text: set the irrigation valve to 50% + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 50 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 50 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: set the irrigation valve to 50% + context: + id: 01KHJQZ1PWX2AKBTZKYY4JG3G2 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:42.364773+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:42.367618+00:00 + - role: user + content: set the irrigation valve to 50% + attachments: null + created: 2026-02-16 08:09:42.364847+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:42.367631+00:00 + duration_ms: 44.945 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_off_the_water_in_the_front_yard-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_off_the_water_in_the_front_yard-0.yaml new file mode 100644 index 000000000..1d69e9d9a --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_off_the_water_in_the_front_yard-0.yaml @@ -0,0 +1,199 @@ +--- +uuid: ccfbcc14-3eb6-4667-ab17-ff81ac17567c +task_id: home2_ru_water_valve-turn_off_the_water_in_the_front_yard-0 +model_id: gemma-3-27b-it +category: valve +task: + input_text: turn off the water in the front yard + expect_changes: + valve.irrigation_valve: + state: closed + attributes: + current_position: 0 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: closed + current_position: 0 + got: + state: open + current_position: 100 + conversation_trace: + - event_type: async_process + data: + text: turn off the water in the front yard + context: + id: 01KHJQYYXKX4F40PDE6F5DJ6KF + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:39.507334+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:39.509308+00:00 + - role: user + content: turn off the water in the front yard + attachments: null + created: 2026-02-16 08:09:39.507407+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:39.509322+00:00 + duration_ms: 39.223 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_off_the_water_in_the_front_yard-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_off_the_water_in_the_front_yard-1.yaml new file mode 100644 index 000000000..27bdd0cca --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_off_the_water_in_the_front_yard-1.yaml @@ -0,0 +1,199 @@ +--- +uuid: 03016114-843b-4f02-afef-3ac0a21320d4 +task_id: home2_ru_water_valve-turn_off_the_water_in_the_front_yard-1 +model_id: gemma-3-27b-it +category: valve +task: + input_text: turn off the water in the front yard + expect_changes: + valve.irrigation_valve: + state: closed + attributes: + current_position: 0 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: closed + current_position: 0 + got: + state: open + current_position: 100 + conversation_trace: + - event_type: async_process + data: + text: turn off the water in the front yard + context: + id: 01KHJQYZ6CSBVDSAEYFC9T21F4 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:39.788762+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:39.791704+00:00 + - role: user + content: turn off the water in the front yard + attachments: null + created: 2026-02-16 08:09:39.788836+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:39.791718+00:00 + duration_ms: 283.618 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_off_the_water_in_the_front_yard-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_off_the_water_in_the_front_yard-2.yaml new file mode 100644 index 000000000..34c391549 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_off_the_water_in_the_front_yard-2.yaml @@ -0,0 +1,199 @@ +--- +uuid: 2bef3aee-6307-4548-95eb-c7e434f128d5 +task_id: home2_ru_water_valve-turn_off_the_water_in_the_front_yard-2 +model_id: gemma-3-27b-it +category: valve +task: + input_text: turn off the water in the front yard + expect_changes: + valve.irrigation_valve: + state: closed + attributes: + current_position: 0 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: closed + current_position: 0 + got: + state: open + current_position: 100 + conversation_trace: + - event_type: async_process + data: + text: turn off the water in the front yard + context: + id: 01KHJQYZPTTRXVX7FWE773R9TA + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:40.314750+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:40.317657+00:00 + - role: user + content: turn off the water in the front yard + attachments: null + created: 2026-02-16 08:09:40.314824+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:40.317672+00:00 + duration_ms: 40.716 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_off_the_water_in_the_front_yard-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_off_the_water_in_the_front_yard-3.yaml new file mode 100644 index 000000000..8619d12a4 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_off_the_water_in_the_front_yard-3.yaml @@ -0,0 +1,199 @@ +--- +uuid: b0a8cad3-fb3c-4e0d-812c-a41bc1e295e9 +task_id: home2_ru_water_valve-turn_off_the_water_in_the_front_yard-3 +model_id: gemma-3-27b-it +category: valve +task: + input_text: turn off the water in the front yard + expect_changes: + valve.irrigation_valve: + state: closed + attributes: + current_position: 0 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: closed + current_position: 0 + got: + state: open + current_position: 100 + conversation_trace: + - event_type: async_process + data: + text: turn off the water in the front yard + context: + id: 01KHJQZ0089B9G9GTKGTV5EVNQ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:40.617130+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:40.622383+00:00 + - role: user + content: turn off the water in the front yard + attachments: null + created: 2026-02-16 08:09:40.617305+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:40.622422+00:00 + duration_ms: 47.306 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_off_the_water_in_the_front_yard-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_off_the_water_in_the_front_yard-4.yaml new file mode 100644 index 000000000..9406f0f21 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_off_the_water_in_the_front_yard-4.yaml @@ -0,0 +1,199 @@ +--- +uuid: 5b4f2b89-d9f8-434e-86d3-2916159c7f76 +task_id: home2_ru_water_valve-turn_off_the_water_in_the_front_yard-4 +model_id: gemma-3-27b-it +category: valve +task: + input_text: turn off the water in the front yard + expect_changes: + valve.irrigation_valve: + state: closed + attributes: + current_position: 0 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: closed + current_position: 0 + got: + state: open + current_position: 100 + conversation_trace: + - event_type: async_process + data: + text: turn off the water in the front yard + context: + id: 01KHJQZ0ARNE3G0876QKPE0CAS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:40.952939+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:40.954108+00:00 + - role: user + content: turn off the water in the front yard + attachments: null + created: 2026-02-16 08:09:40.953011+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:40.954120+00:00 + duration_ms: 37.74 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_on_the_water_in_the_front_yard-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_on_the_water_in_the_front_yard-0.yaml new file mode 100644 index 000000000..f8ac90d0a --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_on_the_water_in_the_front_yard-0.yaml @@ -0,0 +1,199 @@ +--- +uuid: 8bc41682-392d-48cb-aa24-00ff3740e31d +task_id: home2_ru_water_valve-turn_on_the_water_in_the_front_yard-0 +model_id: gemma-3-27b-it +category: valve +task: + input_text: turn on the water in the front yard + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: turn on the water in the front yard + context: + id: 01KHJQYPR0SQHZ024D275P4051 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:31.137008+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:31.138178+00:00 + - role: user + content: turn on the water in the front yard + attachments: null + created: 2026-02-16 08:09:31.137087+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:31.138190+00:00 + duration_ms: 140.185 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_on_the_water_in_the_front_yard-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_on_the_water_in_the_front_yard-1.yaml new file mode 100644 index 000000000..80960699d --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_on_the_water_in_the_front_yard-1.yaml @@ -0,0 +1,199 @@ +--- +uuid: 64930d8b-3c81-4959-864d-6f801177f465 +task_id: home2_ru_water_valve-turn_on_the_water_in_the_front_yard-1 +model_id: gemma-3-27b-it +category: valve +task: + input_text: turn on the water in the front yard + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: turn on the water in the front yard + context: + id: 01KHJQYQ4Q19ZQRGKG52DDTZB5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:31.544021+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:31.546722+00:00 + - role: user + content: turn on the water in the front yard + attachments: null + created: 2026-02-16 08:09:31.544099+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:31.546736+00:00 + duration_ms: 46.053 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_on_the_water_in_the_front_yard-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_on_the_water_in_the_front_yard-2.yaml new file mode 100644 index 000000000..c0742fa71 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_on_the_water_in_the_front_yard-2.yaml @@ -0,0 +1,199 @@ +--- +uuid: f17df1d1-f1d3-473d-be46-db5789e1f261 +task_id: home2_ru_water_valve-turn_on_the_water_in_the_front_yard-2 +model_id: gemma-3-27b-it +category: valve +task: + input_text: turn on the water in the front yard + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: turn on the water in the front yard + context: + id: 01KHJQYQFFFPS1S15B7B2DAMN9 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:31.887516+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:31.888638+00:00 + - role: user + content: turn on the water in the front yard + attachments: null + created: 2026-02-16 08:09:31.887590+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:31.888650+00:00 + duration_ms: 39.972 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_on_the_water_in_the_front_yard-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_on_the_water_in_the_front_yard-3.yaml new file mode 100644 index 000000000..61aa671ae --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_on_the_water_in_the_front_yard-3.yaml @@ -0,0 +1,199 @@ +--- +uuid: 8ee988b6-0ef6-49ee-8407-45c8f328aea1 +task_id: home2_ru_water_valve-turn_on_the_water_in_the_front_yard-3 +model_id: gemma-3-27b-it +category: valve +task: + input_text: turn on the water in the front yard + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: turn on the water in the front yard + context: + id: 01KHJQYQR282JZ221ENN5P53V1 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:32.163051+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:32.164187+00:00 + - role: user + content: turn on the water in the front yard + attachments: null + created: 2026-02-16 08:09:32.163128+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:32.164198+00:00 + duration_ms: 122.384 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_on_the_water_in_the_front_yard-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_on_the_water_in_the_front_yard-4.yaml new file mode 100644 index 000000000..56eac7052 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-turn_on_the_water_in_the_front_yard-4.yaml @@ -0,0 +1,199 @@ +--- +uuid: 6e3625ac-5ea8-45ed-94e9-043a7bbd70d7 +task_id: home2_ru_water_valve-turn_on_the_water_in_the_front_yard-4 +model_id: gemma-3-27b-it +category: valve +task: + input_text: turn on the water in the front yard + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: turn on the water in the front yard + context: + id: 01KHJQYR3KWDY0B09KCGZA4J8H + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:32.531546+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:32.533313+00:00 + - role: user + content: turn on the water in the front yard + attachments: null + created: 2026-02-16 08:09:32.531621+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:32.533325+00:00 + duration_ms: 147.532 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_front_yard-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_front_yard-0.yaml new file mode 100644 index 000000000..ac5ab054e --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_front_yard-0.yaml @@ -0,0 +1,199 @@ +--- +uuid: 3fbb0a90-bae6-43ab-bab4-026066866b4b +task_id: home2_ru_water_valve-water_the_front_yard-0 +model_id: gemma-3-27b-it +category: valve +task: + input_text: water the front yard + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: water the front yard + context: + id: 01KHJQYRG3F8FN6SPV8758A2DK + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:32.931363+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:32.932920+00:00 + - role: user + content: water the front yard + attachments: null + created: 2026-02-16 08:09:32.931438+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:32.932932+00:00 + duration_ms: 37.464 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_front_yard-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_front_yard-1.yaml new file mode 100644 index 000000000..48d9dac2c --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_front_yard-1.yaml @@ -0,0 +1,199 @@ +--- +uuid: a8cfc5c8-585c-4a8a-bfe8-440040df1aa5 +task_id: home2_ru_water_valve-water_the_front_yard-1 +model_id: gemma-3-27b-it +category: valve +task: + input_text: water the front yard + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: water the front yard + context: + id: 01KHJQYRRY2696CKZ6BGENNRED + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:33.214611+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:33.219788+00:00 + - role: user + content: water the front yard + attachments: null + created: 2026-02-16 08:09:33.214685+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:33.219802+00:00 + duration_ms: 140.155 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_front_yard-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_front_yard-2.yaml new file mode 100644 index 000000000..0add1b482 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_front_yard-2.yaml @@ -0,0 +1,199 @@ +--- +uuid: 7088e50e-c4ec-4be6-b5bf-09cef02969fe +task_id: home2_ru_water_valve-water_the_front_yard-2 +model_id: gemma-3-27b-it +category: valve +task: + input_text: water the front yard + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: water the front yard + context: + id: 01KHJQYS53ETE6TBY43AXD2Z8C + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:33.603296+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:33.604458+00:00 + - role: user + content: water the front yard + attachments: null + created: 2026-02-16 08:09:33.603375+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:33.604471+00:00 + duration_ms: 127.288 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_front_yard-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_front_yard-3.yaml new file mode 100644 index 000000000..78745970c --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_front_yard-3.yaml @@ -0,0 +1,199 @@ +--- +uuid: 4dac2c63-7e21-4800-ac0b-066bb4e4608a +task_id: home2_ru_water_valve-water_the_front_yard-3 +model_id: gemma-3-27b-it +category: valve +task: + input_text: water the front yard + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: water the front yard + context: + id: 01KHJQYSGPN7B5E9FH9PP1KERA + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:33.974149+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:33.975251+00:00 + - role: user + content: water the front yard + attachments: null + created: 2026-02-16 08:09:33.974223+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:33.975264+00:00 + duration_ms: 146.691 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_front_yard-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_front_yard-4.yaml new file mode 100644 index 000000000..45139d676 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_front_yard-4.yaml @@ -0,0 +1,199 @@ +--- +uuid: db4ac42d-5357-4c28-a327-1617f4c80099 +task_id: home2_ru_water_valve-water_the_front_yard-4 +model_id: gemma-3-27b-it +category: valve +task: + input_text: water the front yard + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: water the front yard + context: + id: 01KHJQYSWZA7X0QJWB6DB8YX0G + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:34.367441+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:34.370331+00:00 + - role: user + content: water the front yard + attachments: null + created: 2026-02-16 08:09:34.367514+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:34.370345+00:00 + duration_ms: 121.585 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_garden-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_garden-0.yaml new file mode 100644 index 000000000..c659be890 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_garden-0.yaml @@ -0,0 +1,199 @@ +--- +uuid: 99ab132a-c0ec-4116-963a-58e44a889db0 +task_id: home2_ru_water_valve-water_the_garden-0 +model_id: gemma-3-27b-it +category: valve +task: + input_text: water the garden + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: water the garden + context: + id: 01KHJQYT9N8C2PVV6QEJ7AAMHE + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:34.773157+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:34.776362+00:00 + - role: user + content: water the garden + attachments: null + created: 2026-02-16 08:09:34.773254+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:34.776377+00:00 + duration_ms: 40.03 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_garden-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_garden-1.yaml new file mode 100644 index 000000000..98af4f2e5 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_garden-1.yaml @@ -0,0 +1,199 @@ +--- +uuid: b74638f0-16a3-4c92-a293-491e42882a99 +task_id: home2_ru_water_valve-water_the_garden-1 +model_id: gemma-3-27b-it +category: valve +task: + input_text: water the garden + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: water the garden + context: + id: 01KHJQYTK5MZ1V99YB36B23NT3 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:35.077554+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:35.078674+00:00 + - role: user + content: water the garden + attachments: null + created: 2026-02-16 08:09:35.077630+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:35.078686+00:00 + duration_ms: 126.197 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_garden-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_garden-2.yaml new file mode 100644 index 000000000..1bf9addcf --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_garden-2.yaml @@ -0,0 +1,199 @@ +--- +uuid: b229ab37-f883-48f5-a341-bb6eac6e1a93 +task_id: home2_ru_water_valve-water_the_garden-2 +model_id: gemma-3-27b-it +category: valve +task: + input_text: water the garden + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: water the garden + context: + id: 01KHJQYTY1ER2B338JY9GGQQGX + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:35.425335+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:35.427864+00:00 + - role: user + content: water the garden + attachments: null + created: 2026-02-16 08:09:35.425413+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:35.427878+00:00 + duration_ms: 39.088 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_garden-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_garden-3.yaml new file mode 100644 index 000000000..9ec488c62 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_garden-3.yaml @@ -0,0 +1,199 @@ +--- +uuid: 4cde2161-5902-45e4-9388-75775643632d +task_id: home2_ru_water_valve-water_the_garden-3 +model_id: gemma-3-27b-it +category: valve +task: + input_text: water the garden + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: water the garden + context: + id: 01KHJQYV73GF5W72R8W37SBXE3 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:35.715128+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:35.716294+00:00 + - role: user + content: water the garden + attachments: null + created: 2026-02-16 08:09:35.715204+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:35.716306+00:00 + duration_ms: 37.857 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_garden-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_garden-4.yaml new file mode 100644 index 000000000..ce2d6b70c --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home2_ru_water_valve-water_the_garden-4.yaml @@ -0,0 +1,199 @@ +--- +uuid: 00fe263c-a21e-40de-93ca-e9a357206181 +task_id: home2_ru_water_valve-water_the_garden-4 +model_id: gemma-3-27b-it +category: valve +task: + input_text: water the garden + expect_changes: + valve.irrigation_valve: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + valve.irrigation_valve: + expected: + state: open + current_position: 100 + got: + state: closed + current_position: 0 + conversation_trace: + - event_type: async_process + data: + text: water the garden + context: + id: 01KHJQYVG1ZA466J2Y0BMT6GTH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:36.001185+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Camera Motion + domain: binary_sensor + areas: Garage + - names: Garage Camera Person + domain: binary_sensor + areas: Garage + - names: Garage Camera Sound + domain: binary_sensor + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garden Light + domain: light + areas: Front yard + - names: Irrigation Valve + domain: valve + areas: Front yard + - names: Irrigation Valve Battery + domain: sensor + areas: Front yard + - names: Irrigation Valve Meter-reading + domain: sensor + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Thermostat + domain: climate + areas: Kitchen + - names: Thermostat Humidity + domain: sensor + areas: Kitchen + - names: Thermostat Temperature + domain: sensor + areas: Kitchen + created: 2026-02-16 08:09:36.002294+00:00 + - role: user + content: water the garden + attachments: null + created: 2026-02-16 08:09:36.001284+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:36.002305+00:00 + duration_ms: 39.31 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_bedroom_fan-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_bedroom_fan-0.yaml new file mode 100644 index 000000000..166be4110 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_bedroom_fan-0.yaml @@ -0,0 +1,153 @@ +--- +uuid: 6e2b8673-b722-41ac-afe5-7a09aeb97d33 +task_id: home5_cn_fan-turn_off_the_bedroom_fan-0 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn off the bedroom fan + expect_changes: + fan.bedroom_fan: + state: 'off' + attributes: + percentage: 0 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'off' + percentage: 0 + got: + state: 'on' + percentage: 100 + conversation_trace: + - event_type: async_process + data: + text: Turn off the bedroom fan + context: + id: 01KHJQZ84T1WV99TJDBKRPFWSH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:48.954843+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:48.955702+00:00 + - role: user + content: Turn off the bedroom fan + attachments: null + created: 2026-02-16 08:09:48.954919+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:48.955713+00:00 + duration_ms: 43.419 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_bedroom_fan-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_bedroom_fan-1.yaml new file mode 100644 index 000000000..f9ff7ddb8 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_bedroom_fan-1.yaml @@ -0,0 +1,153 @@ +--- +uuid: 2f378553-b6b0-49d8-b004-6eeb2663c2f5 +task_id: home5_cn_fan-turn_off_the_bedroom_fan-1 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn off the bedroom fan + expect_changes: + fan.bedroom_fan: + state: 'off' + attributes: + percentage: 0 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'off' + percentage: 0 + got: + state: 'on' + percentage: 100 + conversation_trace: + - event_type: async_process + data: + text: Turn off the bedroom fan + context: + id: 01KHJQZ8DHQ17VEVGGFAV5WAGE + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:49.233869+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:49.234694+00:00 + - role: user + content: Turn off the bedroom fan + attachments: null + created: 2026-02-16 08:09:49.233939+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:49.234706+00:00 + duration_ms: 40.864 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_bedroom_fan-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_bedroom_fan-2.yaml new file mode 100644 index 000000000..a6c976cb1 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_bedroom_fan-2.yaml @@ -0,0 +1,153 @@ +--- +uuid: 0bd872ea-1b74-4ad1-9572-0ef35737b9b6 +task_id: home5_cn_fan-turn_off_the_bedroom_fan-2 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn off the bedroom fan + expect_changes: + fan.bedroom_fan: + state: 'off' + attributes: + percentage: 0 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'off' + percentage: 0 + got: + state: 'on' + percentage: 100 + conversation_trace: + - event_type: async_process + data: + text: Turn off the bedroom fan + context: + id: 01KHJQZ8PPBQM6S1FSSGBNEGR2 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:49.526346+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:49.527140+00:00 + - role: user + content: Turn off the bedroom fan + attachments: null + created: 2026-02-16 08:09:49.526425+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:49.527152+00:00 + duration_ms: 125.88 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_bedroom_fan-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_bedroom_fan-3.yaml new file mode 100644 index 000000000..b342ef0d6 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_bedroom_fan-3.yaml @@ -0,0 +1,153 @@ +--- +uuid: 498c59cf-43b4-461c-9aaa-8e5ac7eae6dd +task_id: home5_cn_fan-turn_off_the_bedroom_fan-3 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn off the bedroom fan + expect_changes: + fan.bedroom_fan: + state: 'off' + attributes: + percentage: 0 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'off' + percentage: 0 + got: + state: 'on' + percentage: 100 + conversation_trace: + - event_type: async_process + data: + text: Turn off the bedroom fan + context: + id: 01KHJQZ91MJMEEFZQ2VHQZZ0QG + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:49.876140+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:49.877011+00:00 + - role: user + content: Turn off the bedroom fan + attachments: null + created: 2026-02-16 08:09:49.876213+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:49.877022+00:00 + duration_ms: 48.145 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_bedroom_fan-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_bedroom_fan-4.yaml new file mode 100644 index 000000000..924908d76 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_bedroom_fan-4.yaml @@ -0,0 +1,153 @@ +--- +uuid: 6a739843-6e50-43c5-a205-fdab4a3a8954 +task_id: home5_cn_fan-turn_off_the_bedroom_fan-4 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn off the bedroom fan + expect_changes: + fan.bedroom_fan: + state: 'off' + attributes: + percentage: 0 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'off' + percentage: 0 + got: + state: 'on' + percentage: 100 + conversation_trace: + - event_type: async_process + data: + text: Turn off the bedroom fan + context: + id: 01KHJQZ9A9M2BTYV1ZB5HNTJRC + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:50.153316+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:50.154153+00:00 + - role: user + content: Turn off the bedroom fan + attachments: null + created: 2026-02-16 08:09:50.153391+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:50.154163+00:00 + duration_ms: 40.347 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_fan-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_fan-0.yaml new file mode 100644 index 000000000..b96da2577 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_fan-0.yaml @@ -0,0 +1,153 @@ +--- +uuid: 1bd5ef00-7f54-4411-8ae7-520a730bbbc7 +task_id: home5_cn_fan-turn_off_the_fan-0 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn off the fan + expect_changes: + fan.bedroom_fan: + state: 'off' + attributes: + percentage: 0 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'off' + percentage: 0 + got: + state: 'on' + percentage: 100 + conversation_trace: + - event_type: async_process + data: + text: Turn off the fan + context: + id: 01KHJQZ9JQYW965FA8ZBH0EQ7K + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:50.423298+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:50.424148+00:00 + - role: user + content: Turn off the fan + attachments: null + created: 2026-02-16 08:09:50.423375+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:50.424158+00:00 + duration_ms: 44.169 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_fan-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_fan-1.yaml new file mode 100644 index 000000000..9e7054d34 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_fan-1.yaml @@ -0,0 +1,153 @@ +--- +uuid: 14342bc5-dfe3-47a8-86a2-c815e3d25cde +task_id: home5_cn_fan-turn_off_the_fan-1 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn off the fan + expect_changes: + fan.bedroom_fan: + state: 'off' + attributes: + percentage: 0 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'off' + percentage: 0 + got: + state: 'on' + percentage: 100 + conversation_trace: + - event_type: async_process + data: + text: Turn off the fan + context: + id: 01KHJQZ9V2ESTKR8A1DY8J8X4N + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:50.690935+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:50.691822+00:00 + - role: user + content: Turn off the fan + attachments: null + created: 2026-02-16 08:09:50.691011+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:50.691836+00:00 + duration_ms: 40.071 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_fan-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_fan-2.yaml new file mode 100644 index 000000000..39827ac8b --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_fan-2.yaml @@ -0,0 +1,153 @@ +--- +uuid: 4a79e506-ab00-4680-ab5f-18b8615d4a5d +task_id: home5_cn_fan-turn_off_the_fan-2 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn off the fan + expect_changes: + fan.bedroom_fan: + state: 'off' + attributes: + percentage: 0 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'off' + percentage: 0 + got: + state: 'on' + percentage: 100 + conversation_trace: + - event_type: async_process + data: + text: Turn off the fan + context: + id: 01KHJQZA2XATA88RHP54XMPZ0B + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:50.942046+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:50.942904+00:00 + - role: user + content: Turn off the fan + attachments: null + created: 2026-02-16 08:09:50.942119+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:50.942915+00:00 + duration_ms: 36.935 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_fan-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_fan-3.yaml new file mode 100644 index 000000000..640dd5196 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_fan-3.yaml @@ -0,0 +1,153 @@ +--- +uuid: e6e64a0f-e2a7-4724-ad0b-787d66656ad9 +task_id: home5_cn_fan-turn_off_the_fan-3 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn off the fan + expect_changes: + fan.bedroom_fan: + state: 'off' + attributes: + percentage: 0 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'off' + percentage: 0 + got: + state: 'on' + percentage: 100 + conversation_trace: + - event_type: async_process + data: + text: Turn off the fan + context: + id: 01KHJQZADW6H5HR61725DGRYQZ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:51.292372+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:51.293221+00:00 + - role: user + content: Turn off the fan + attachments: null + created: 2026-02-16 08:09:51.292445+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:51.293248+00:00 + duration_ms: 136.752 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_fan-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_fan-4.yaml new file mode 100644 index 000000000..91711b6ab --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_off_the_fan-4.yaml @@ -0,0 +1,153 @@ +--- +uuid: 4a9284ab-7073-4883-bca3-afe168ef32b2 +task_id: home5_cn_fan-turn_off_the_fan-4 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn off the fan + expect_changes: + fan.bedroom_fan: + state: 'off' + attributes: + percentage: 0 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'off' + percentage: 0 + got: + state: 'on' + percentage: 100 + conversation_trace: + - event_type: async_process + data: + text: Turn off the fan + context: + id: 01KHJQZARWG89E71F12Y7JJ3CT + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:51.644612+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:51.645425+00:00 + - role: user + content: Turn off the fan + attachments: null + created: 2026-02-16 08:09:51.644684+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:51.645436+00:00 + duration_ms: 41.14 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_1_fan-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_1_fan-0.yaml new file mode 100644 index 000000000..f6af9ecad --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_1_fan-0.yaml @@ -0,0 +1,153 @@ +--- +uuid: 53136f8a-9b34-422a-a3fa-539f196af193 +task_id: home5_cn_fan-turn_on_the_bedroom_1_fan-0 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the bedroom 1 fan + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'on' + percentage: 100 + got: + state: 'off' + percentage: 0 + conversation_trace: + - event_type: async_process + data: + text: Turn on the bedroom 1 fan + context: + id: 01KHJQZ1ZPMT9VV3AACEJ0K71P + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:42.646757+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:42.647647+00:00 + - role: user + content: Turn on the bedroom 1 fan + attachments: null + created: 2026-02-16 08:09:42.646828+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:42.647659+00:00 + duration_ms: 144.162 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_1_fan-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_1_fan-1.yaml new file mode 100644 index 000000000..499d1f6ab --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_1_fan-1.yaml @@ -0,0 +1,153 @@ +--- +uuid: 7eb100ff-2469-425d-8b42-7d825fd5452b +task_id: home5_cn_fan-turn_on_the_bedroom_1_fan-1 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the bedroom 1 fan + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'on' + percentage: 100 + got: + state: 'off' + percentage: 0 + conversation_trace: + - event_type: async_process + data: + text: Turn on the bedroom 1 fan + context: + id: 01KHJQZ2BKE482P9AEWEDDV1MA + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:43.027559+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:43.028442+00:00 + - role: user + content: Turn on the bedroom 1 fan + attachments: null + created: 2026-02-16 08:09:43.027636+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:43.028454+00:00 + duration_ms: 38.689 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_1_fan-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_1_fan-2.yaml new file mode 100644 index 000000000..f10edf88c --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_1_fan-2.yaml @@ -0,0 +1,153 @@ +--- +uuid: 86944a0a-fa1d-4bf2-8045-64456b482c7a +task_id: home5_cn_fan-turn_on_the_bedroom_1_fan-2 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the bedroom 1 fan + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'on' + percentage: 100 + got: + state: 'off' + percentage: 0 + conversation_trace: + - event_type: async_process + data: + text: Turn on the bedroom 1 fan + context: + id: 01KHJQZ2KM86T56CCENXH0KQQD + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:43.284867+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:43.285841+00:00 + - role: user + content: Turn on the bedroom 1 fan + attachments: null + created: 2026-02-16 08:09:43.284957+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:43.285851+00:00 + duration_ms: 39.98 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_1_fan-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_1_fan-3.yaml new file mode 100644 index 000000000..4f58c145f --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_1_fan-3.yaml @@ -0,0 +1,153 @@ +--- +uuid: 24c8cbf9-74a5-4f85-b9f2-a8a362951073 +task_id: home5_cn_fan-turn_on_the_bedroom_1_fan-3 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the bedroom 1 fan + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'on' + percentage: 100 + got: + state: 'off' + percentage: 0 + conversation_trace: + - event_type: async_process + data: + text: Turn on the bedroom 1 fan + context: + id: 01KHJQZ2XY5P31QK5MPHEG92F8 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:43.614691+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:43.615553+00:00 + - role: user + content: Turn on the bedroom 1 fan + attachments: null + created: 2026-02-16 08:09:43.614764+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:43.615564+00:00 + duration_ms: 42.831 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_1_fan-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_1_fan-4.yaml new file mode 100644 index 000000000..f52aa03cc --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_1_fan-4.yaml @@ -0,0 +1,153 @@ +--- +uuid: 78841823-6516-4286-a0bd-0e16215b6ffd +task_id: home5_cn_fan-turn_on_the_bedroom_1_fan-4 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the bedroom 1 fan + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'on' + percentage: 100 + got: + state: 'off' + percentage: 0 + conversation_trace: + - event_type: async_process + data: + text: Turn on the bedroom 1 fan + context: + id: 01KHJQZ36MR1YFJ64F75WVK96D + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:43.892938+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:43.893956+00:00 + - role: user + content: Turn on the bedroom 1 fan + attachments: null + created: 2026-02-16 08:09:43.893019+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:43.893969+00:00 + duration_ms: 127.018 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_fan-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_fan-0.yaml new file mode 100644 index 000000000..4d7cbff76 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_fan-0.yaml @@ -0,0 +1,153 @@ +--- +uuid: 02eb00b1-a6e8-40d0-be19-2f8c97c4be09 +task_id: home5_cn_fan-turn_on_the_bedroom_fan-0 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the bedroom fan + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'on' + percentage: 100 + got: + state: 'off' + percentage: 0 + conversation_trace: + - event_type: async_process + data: + text: Turn on the bedroom fan + context: + id: 01KHJQZ3HF1AVX4DT3AX64B3VH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:44.239581+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:44.240416+00:00 + - role: user + content: Turn on the bedroom fan + attachments: null + created: 2026-02-16 08:09:44.239654+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:44.240427+00:00 + duration_ms: 46.522 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_fan-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_fan-1.yaml new file mode 100644 index 000000000..0ca94957a --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_fan-1.yaml @@ -0,0 +1,153 @@ +--- +uuid: 6e259a46-1764-4ed2-82bc-3f99b9965b90 +task_id: home5_cn_fan-turn_on_the_bedroom_fan-1 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the bedroom fan + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'on' + percentage: 100 + got: + state: 'off' + percentage: 0 + conversation_trace: + - event_type: async_process + data: + text: Turn on the bedroom fan + context: + id: 01KHJQZ3T00MS8TDKVK8EMBQ33 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:44.512614+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:44.513443+00:00 + - role: user + content: Turn on the bedroom fan + attachments: null + created: 2026-02-16 08:09:44.512688+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:44.513456+00:00 + duration_ms: 131.969 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_fan-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_fan-2.yaml new file mode 100644 index 000000000..725e25e7c --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_fan-2.yaml @@ -0,0 +1,153 @@ +--- +uuid: 91d87cf5-bfea-4e21-9998-19048afd728f +task_id: home5_cn_fan-turn_on_the_bedroom_fan-2 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the bedroom fan + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'on' + percentage: 100 + got: + state: 'off' + percentage: 0 + conversation_trace: + - event_type: async_process + data: + text: Turn on the bedroom fan + context: + id: 01KHJQZ45D5ASZE8T2ZAV5XMQE + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:44.877785+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:44.878675+00:00 + - role: user + content: Turn on the bedroom fan + attachments: null + created: 2026-02-16 08:09:44.877858+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:44.878687+00:00 + duration_ms: 125.392 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_fan-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_fan-3.yaml new file mode 100644 index 000000000..996a67173 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_fan-3.yaml @@ -0,0 +1,153 @@ +--- +uuid: 2377c926-af55-4749-95dd-cfc31b0e7995 +task_id: home5_cn_fan-turn_on_the_bedroom_fan-3 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the bedroom fan + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'on' + percentage: 100 + got: + state: 'off' + percentage: 0 + conversation_trace: + - event_type: async_process + data: + text: Turn on the bedroom fan + context: + id: 01KHJQZ4FXVJBC6DER2PG8XCE4 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:45.213361+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:45.214187+00:00 + - role: user + content: Turn on the bedroom fan + attachments: null + created: 2026-02-16 08:09:45.213434+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:45.214197+00:00 + duration_ms: 37.335 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_fan-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_fan-4.yaml new file mode 100644 index 000000000..676d4e0d1 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_bedroom_fan-4.yaml @@ -0,0 +1,153 @@ +--- +uuid: 78f22f4b-80fa-454b-a53b-ea3eadbe419b +task_id: home5_cn_fan-turn_on_the_bedroom_fan-4 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the bedroom fan + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'on' + percentage: 100 + got: + state: 'off' + percentage: 0 + conversation_trace: + - event_type: async_process + data: + text: Turn on the bedroom fan + context: + id: 01KHJQZ4RAXV1980EWY2A1F2FV + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:45.482580+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:45.483445+00:00 + - role: user + content: Turn on the bedroom fan + attachments: null + created: 2026-02-16 08:09:45.482655+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:45.483456+00:00 + duration_ms: 121.687 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan-0.yaml new file mode 100644 index 000000000..34efbea6f --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan-0.yaml @@ -0,0 +1,153 @@ +--- +uuid: a8c5e1c3-b9dd-44ff-bc83-82a75d473571 +task_id: home5_cn_fan-turn_on_the_fan-0 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the fan + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'on' + percentage: 100 + got: + state: 'off' + percentage: 0 + conversation_trace: + - event_type: async_process + data: + text: Turn on the fan + context: + id: 01KHJQZ6NA3QCFSXZA6B1DXQVX + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:47.434250+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:47.435090+00:00 + - role: user + content: Turn on the fan + attachments: null + created: 2026-02-16 08:09:47.434327+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:47.435100+00:00 + duration_ms: 40.854 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan-1.yaml new file mode 100644 index 000000000..fc4832cbd --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan-1.yaml @@ -0,0 +1,153 @@ +--- +uuid: b4018203-f824-42d4-be2c-0cd151583f37 +task_id: home5_cn_fan-turn_on_the_fan-1 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the fan + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'on' + percentage: 100 + got: + state: 'off' + percentage: 0 + conversation_trace: + - event_type: async_process + data: + text: Turn on the fan + context: + id: 01KHJQZ6XVP8CA75G0PCPDWERN + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:47.707091+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:47.707938+00:00 + - role: user + content: Turn on the fan + attachments: null + created: 2026-02-16 08:09:47.707164+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:47.707949+00:00 + duration_ms: 135.708 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan-2.yaml new file mode 100644 index 000000000..fc860310b --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan-2.yaml @@ -0,0 +1,153 @@ +--- +uuid: 5f87091a-bb82-4275-9e91-b7796123a943 +task_id: home5_cn_fan-turn_on_the_fan-2 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the fan + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'on' + percentage: 100 + got: + state: 'off' + percentage: 0 + conversation_trace: + - event_type: async_process + data: + text: Turn on the fan + context: + id: 01KHJQZ7AFW5QX9M3M1EW12BCB + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:48.111809+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:48.112690+00:00 + - role: user + content: Turn on the fan + attachments: null + created: 2026-02-16 08:09:48.111883+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:48.112703+00:00 + duration_ms: 122.318 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan-3.yaml new file mode 100644 index 000000000..0b6eb8f03 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan-3.yaml @@ -0,0 +1,153 @@ +--- +uuid: 5b39d4b5-7b16-4c81-bb01-3ab3768e9cdd +task_id: home5_cn_fan-turn_on_the_fan-3 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the fan + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'on' + percentage: 100 + got: + state: 'off' + percentage: 0 + conversation_trace: + - event_type: async_process + data: + text: Turn on the fan + context: + id: 01KHJQZ7MY34XTXBTZ0E98CQ8Y + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:48.446690+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:48.447520+00:00 + - role: user + content: Turn on the fan + attachments: null + created: 2026-02-16 08:09:48.446763+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:48.447532+00:00 + duration_ms: 44.71 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan-4.yaml new file mode 100644 index 000000000..cdde64aab --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan-4.yaml @@ -0,0 +1,153 @@ +--- +uuid: 9600de2d-f71b-47cb-9779-bcb32eb9f6c1 +task_id: home5_cn_fan-turn_on_the_fan-4 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the fan + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'on' + percentage: 100 + got: + state: 'off' + percentage: 0 + conversation_trace: + - event_type: async_process + data: + text: Turn on the fan + context: + id: 01KHJQZ7WZ3CF2EQZKVH6HFTQ6 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:48.704075+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:48.704900+00:00 + - role: user + content: Turn on the fan + attachments: null + created: 2026-02-16 08:09:48.704151+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:48.704912+00:00 + duration_ms: 39.276 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan_in_the_bedroom-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan_in_the_bedroom-0.yaml new file mode 100644 index 000000000..62ee60999 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan_in_the_bedroom-0.yaml @@ -0,0 +1,153 @@ +--- +uuid: 6e45735f-7cc4-4e62-9ecb-b899e045b540 +task_id: home5_cn_fan-turn_on_the_fan_in_the_bedroom-0 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the fan in the bedroom + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'on' + percentage: 100 + got: + state: 'off' + percentage: 0 + conversation_trace: + - event_type: async_process + data: + text: Turn on the fan in the bedroom + context: + id: 01KHJQZ53VDQWG5YN6MC37M852 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:45.851706+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:45.852522+00:00 + - role: user + content: Turn on the fan in the bedroom + attachments: null + created: 2026-02-16 08:09:45.851779+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:45.852533+00:00 + duration_ms: 183.29 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan_in_the_bedroom-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan_in_the_bedroom-1.yaml new file mode 100644 index 000000000..750b6062c --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan_in_the_bedroom-1.yaml @@ -0,0 +1,153 @@ +--- +uuid: 78390d28-67b5-46a3-ac15-8c7021d3f024 +task_id: home5_cn_fan-turn_on_the_fan_in_the_bedroom-1 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the fan in the bedroom + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'on' + percentage: 100 + got: + state: 'off' + percentage: 0 + conversation_trace: + - event_type: async_process + data: + text: Turn on the fan in the bedroom + context: + id: 01KHJQZ5G98ACFMKDMABCTSYV4 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:46.249982+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:46.251175+00:00 + - role: user + content: Turn on the fan in the bedroom + attachments: null + created: 2026-02-16 08:09:46.250060+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:46.251189+00:00 + duration_ms: 41.516 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan_in_the_bedroom-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan_in_the_bedroom-2.yaml new file mode 100644 index 000000000..38bb33ab0 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan_in_the_bedroom-2.yaml @@ -0,0 +1,153 @@ +--- +uuid: abaf73bf-ad78-479b-85c1-cb184d48e940 +task_id: home5_cn_fan-turn_on_the_fan_in_the_bedroom-2 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the fan in the bedroom + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'on' + percentage: 100 + got: + state: 'off' + percentage: 0 + conversation_trace: + - event_type: async_process + data: + text: Turn on the fan in the bedroom + context: + id: 01KHJQZ5SBGPA36D56GGNSW4TQ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:46.539997+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:46.540836+00:00 + - role: user + content: Turn on the fan in the bedroom + attachments: null + created: 2026-02-16 08:09:46.540070+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:46.540848+00:00 + duration_ms: 126.737 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan_in_the_bedroom-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan_in_the_bedroom-3.yaml new file mode 100644 index 000000000..04d8599ee --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan_in_the_bedroom-3.yaml @@ -0,0 +1,153 @@ +--- +uuid: ac10bcb0-6929-44ad-84c5-70df6648ff5b +task_id: home5_cn_fan-turn_on_the_fan_in_the_bedroom-3 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the fan in the bedroom + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'on' + percentage: 100 + got: + state: 'off' + percentage: 0 + conversation_trace: + - event_type: async_process + data: + text: Turn on the fan in the bedroom + context: + id: 01KHJQZ64WPK2T33B1W6SVGSEX + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:46.908759+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:46.909628+00:00 + - role: user + content: Turn on the fan in the bedroom + attachments: null + created: 2026-02-16 08:09:46.908835+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:46.909639+00:00 + duration_ms: 38.042 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan_in_the_bedroom-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan_in_the_bedroom-4.yaml new file mode 100644 index 000000000..1891f1976 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home5_cn_fan-turn_on_the_fan_in_the_bedroom-4.yaml @@ -0,0 +1,153 @@ +--- +uuid: 2e70b1af-cb2c-4bcc-91f3-69ccc6ba6b13 +task_id: home5_cn_fan-turn_on_the_fan_in_the_bedroom-4 +model_id: gemma-3-27b-it +category: fan +task: + input_text: Turn on the fan in the bedroom + expect_changes: + fan.bedroom_fan: + state: 'on' + attributes: + percentage: 100 +response: Error talking to API +context: + unexpected_states: + fan.bedroom_fan: + expected: + state: 'on' + percentage: 100 + got: + state: 'off' + percentage: 0 + conversation_trace: + - event_type: async_process + data: + text: Turn on the fan in the bedroom + context: + id: 01KHJQZ6CWSP55MTYQG4MTX9K6 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:47.164994+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom Fan + domain: fan + areas: Bedroom 1 + - names: Kitchenette Light + domain: light + areas: Kitchenette + - names: Living Room Light + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: binary_sensor + areas: Front Porch + - names: Motion Sensor Battery + domain: sensor + areas: Front Porch + - names: Porch Light + domain: light + areas: Front Porch + - names: Wood Stove Temperature Sensor Humidity + domain: sensor + areas: Living Room + - names: Wood Stove Temperature Sensor Temperature + domain: sensor + areas: Living Room + created: 2026-02-16 08:09:47.165864+00:00 + - role: user + content: Turn on the fan in the bedroom + attachments: null + created: 2026-02-16 08:09:47.165067+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:47.165877+00:00 + duration_ms: 44.142 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_curtains-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_curtains-0.yaml new file mode 100644 index 000000000..0d93c4436 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_curtains-0.yaml @@ -0,0 +1,261 @@ +--- +uuid: d7f787bb-35f1-49b8-918e-1bdeedeee85f +task_id: home7_dk_cover_curtain-close_the_curtains-0 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the curtains + expect_changes: + cover.smart_curtain: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the curtains + context: + id: 01KHJQZNNRV5DMDWQQQWMHQXNF + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:02.809028+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:02.815521+00:00 + - role: user + content: Close the curtains + attachments: null + created: 2026-02-16 08:10:02.809101+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e3b0bf0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:02.815536+00:00 + duration_ms: 131.666 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_curtains-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_curtains-1.yaml new file mode 100644 index 000000000..8b5224972 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_curtains-1.yaml @@ -0,0 +1,261 @@ +--- +uuid: 99d53e0c-e998-495f-97af-c54622c7dc6f +task_id: home7_dk_cover_curtain-close_the_curtains-1 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the curtains + expect_changes: + cover.smart_curtain: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the curtains + context: + id: 01KHJQZP1DKEK801420WRMF5TF + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:03.181656+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:03.183278+00:00 + - role: user + content: Close the curtains + attachments: null + created: 2026-02-16 08:10:03.181729+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16eecdd20>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:03.183292+00:00 + duration_ms: 144.917 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_curtains-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_curtains-2.yaml new file mode 100644 index 000000000..c3e0c541e --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_curtains-2.yaml @@ -0,0 +1,261 @@ +--- +uuid: 8907bc79-1d45-4958-b0c9-b3ada1a7e2a8 +task_id: home7_dk_cover_curtain-close_the_curtains-2 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the curtains + expect_changes: + cover.smart_curtain: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the curtains + context: + id: 01KHJQZPDSSC13RCVNS3PHTZPE + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:03.577467+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:03.579577+00:00 + - role: user + content: Close the curtains + attachments: null + created: 2026-02-16 08:10:03.577541+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16f92f5e0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:03.579591+00:00 + duration_ms: 42.218 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_curtains-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_curtains-3.yaml new file mode 100644 index 000000000..93f62d671 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_curtains-3.yaml @@ -0,0 +1,261 @@ +--- +uuid: 3ec01c18-7e69-4451-90e1-c405209d329b +task_id: home7_dk_cover_curtain-close_the_curtains-3 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the curtains + expect_changes: + cover.smart_curtain: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the curtains + context: + id: 01KHJQZPQBPZRTX3E2XJDMSZ8W + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:03.883903+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:03.885590+00:00 + - role: user + content: Close the curtains + attachments: null + created: 2026-02-16 08:10:03.883976+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17dded850>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:03.885603+00:00 + duration_ms: 124.015 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_curtains-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_curtains-4.yaml new file mode 100644 index 000000000..4b7026cc7 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_curtains-4.yaml @@ -0,0 +1,261 @@ +--- +uuid: d54377f0-0da1-4746-a526-8aefcafb1665 +task_id: home7_dk_cover_curtain-close_the_curtains-4 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the curtains + expect_changes: + cover.smart_curtain: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the curtains + context: + id: 01KHJQZQ3T51TNFS7KB10PSC1Q + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:04.282684+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:04.290384+00:00 + - role: user + content: Close the curtains + attachments: null + created: 2026-02-16 08:10:04.282761+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17dc25e80>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:04.290401+00:00 + duration_ms: 53.312 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_living_room_curtain-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_living_room_curtain-0.yaml new file mode 100644 index 000000000..b098ed878 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_living_room_curtain-0.yaml @@ -0,0 +1,261 @@ +--- +uuid: b558dbfa-b62f-42da-b639-6d2146dfd1a5 +task_id: home7_dk_cover_curtain-close_the_living_room_curtain-0 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the living room curtain + expect_changes: + cover.smart_curtain: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the living room curtain + context: + id: 01KHJQZM54QQV7DYDCQY1C5BXF + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:01.252993+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:01.254692+00:00 + - role: user + content: Close the living room curtain + attachments: null + created: 2026-02-16 08:10:01.253067+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16ffd8ca0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:01.254703+00:00 + duration_ms: 46.404 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_living_room_curtain-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_living_room_curtain-1.yaml new file mode 100644 index 000000000..b4d2b7476 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_living_room_curtain-1.yaml @@ -0,0 +1,261 @@ +--- +uuid: 9041b817-1245-4d97-88a8-67c44697d764 +task_id: home7_dk_cover_curtain-close_the_living_room_curtain-1 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the living room curtain + expect_changes: + cover.smart_curtain: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the living room curtain + context: + id: 01KHJQZMFAWEPKT0634RG4Q23D + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:01.578824+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:01.581741+00:00 + - role: user + content: Close the living room curtain + attachments: null + created: 2026-02-16 08:10:01.578896+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16f51ada0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:01.581755+00:00 + duration_ms: 49.766 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_living_room_curtain-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_living_room_curtain-2.yaml new file mode 100644 index 000000000..b63a92c3f --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_living_room_curtain-2.yaml @@ -0,0 +1,261 @@ +--- +uuid: 59e1a985-0399-43ad-9c05-38d662878434 +task_id: home7_dk_cover_curtain-close_the_living_room_curtain-2 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the living room curtain + expect_changes: + cover.smart_curtain: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the living room curtain + context: + id: 01KHJQZMTKT6YE3ZXQ75RRPP28 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:01.939254+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:01.940888+00:00 + - role: user + content: Close the living room curtain + attachments: null + created: 2026-02-16 08:10:01.939329+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d12d850>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:01.940901+00:00 + duration_ms: 40.457 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_living_room_curtain-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_living_room_curtain-3.yaml new file mode 100644 index 000000000..89e81af3f --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_living_room_curtain-3.yaml @@ -0,0 +1,261 @@ +--- +uuid: 04f8ce13-8645-48ba-8d9d-66f5e587ca9d +task_id: home7_dk_cover_curtain-close_the_living_room_curtain-3 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the living room curtain + expect_changes: + cover.smart_curtain: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the living room curtain + context: + id: 01KHJQZN3Q8XW52Y9RR6PPSY85 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:02.231906+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:02.233523+00:00 + - role: user + content: Close the living room curtain + attachments: null + created: 2026-02-16 08:10:02.231981+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16f02c460>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:02.233536+00:00 + duration_ms: 47.122 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_living_room_curtain-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_living_room_curtain-4.yaml new file mode 100644 index 000000000..77e6edd95 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_living_room_curtain-4.yaml @@ -0,0 +1,261 @@ +--- +uuid: be908627-d592-4888-ae99-064e79e13179 +task_id: home7_dk_cover_curtain-close_the_living_room_curtain-4 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the living room curtain + expect_changes: + cover.smart_curtain: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the living room curtain + context: + id: 01KHJQZNCR1K8YBZQRBGPPC6V1 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:02.520940+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:02.522838+00:00 + - role: user + content: Close the living room curtain + attachments: null + created: 2026-02-16 08:10:02.521012+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e138ca0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:02.522851+00:00 + duration_ms: 40.912 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_smart_curtains-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_smart_curtains-0.yaml new file mode 100644 index 000000000..b20749e4e --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_smart_curtains-0.yaml @@ -0,0 +1,261 @@ +--- +uuid: f9d8d53c-738e-484e-b7ab-ef434649c4ad +task_id: home7_dk_cover_curtain-close_the_smart_curtains-0 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the smart curtains + expect_changes: + cover.smart_curtain: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the smart curtains + context: + id: 01KHJQZJ8J1728WCTE4J4G3PYE + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:59.314706+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:09:59.317511+00:00 + - role: user + content: Close the smart curtains + attachments: null + created: 2026-02-16 08:09:59.314779+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16f04bb60>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:59.317525+00:00 + duration_ms: 137.494 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_smart_curtains-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_smart_curtains-1.yaml new file mode 100644 index 000000000..edc2e0bbb --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_smart_curtains-1.yaml @@ -0,0 +1,261 @@ +--- +uuid: b1c0bf17-f50b-4675-9928-960a76d4b08c +task_id: home7_dk_cover_curtain-close_the_smart_curtains-1 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the smart curtains + expect_changes: + cover.smart_curtain: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the smart curtains + context: + id: 01KHJQZJNMKAJ7RN743F5XPZZ5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:59.732819+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:09:59.734437+00:00 + - role: user + content: Close the smart curtains + attachments: null + created: 2026-02-16 08:09:59.732893+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d68fc10>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:59.734451+00:00 + duration_ms: 130.161 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_smart_curtains-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_smart_curtains-2.yaml new file mode 100644 index 000000000..3ab68840d --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_smart_curtains-2.yaml @@ -0,0 +1,261 @@ +--- +uuid: 11a9b844-6144-49ad-949b-4103671014ce +task_id: home7_dk_cover_curtain-close_the_smart_curtains-2 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the smart curtains + expect_changes: + cover.smart_curtain: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the smart curtains + context: + id: 01KHJQZK20BXY1GMJ2ECY7XMAG + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:00.128526+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:00.130206+00:00 + - role: user + content: Close the smart curtains + attachments: null + created: 2026-02-16 08:10:00.128600+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e9dcca0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:00.130220+00:00 + duration_ms: 44.732 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_smart_curtains-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_smart_curtains-3.yaml new file mode 100644 index 000000000..40d37847a --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_smart_curtains-3.yaml @@ -0,0 +1,261 @@ +--- +uuid: 5869c051-cdf2-44ff-8d37-f92d881cdc45 +task_id: home7_dk_cover_curtain-close_the_smart_curtains-3 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the smart curtains + expect_changes: + cover.smart_curtain: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the smart curtains + context: + id: 01KHJQZKC8HZEB32YSSNTQQRQV + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:00.456926+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:00.459214+00:00 + - role: user + content: Close the smart curtains + attachments: null + created: 2026-02-16 08:10:00.457002+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d6183b0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:00.459241+00:00 + duration_ms: 45.28 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_smart_curtains-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_smart_curtains-4.yaml new file mode 100644 index 000000000..0f8ac633d --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-close_the_smart_curtains-4.yaml @@ -0,0 +1,261 @@ +--- +uuid: 1e3fcbc8-a5db-4a96-8cbb-db4c89bf0930 +task_id: home7_dk_cover_curtain-close_the_smart_curtains-4 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Close the smart curtains + expect_changes: + cover.smart_curtain: + state: closed + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: closed + got: + state: open + conversation_trace: + - event_type: async_process + data: + text: Close the smart curtains + context: + id: 01KHJQZKW1YH0542CE6N2E99G2 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:00.961566+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:00.963407+00:00 + - role: user + content: Close the smart curtains + attachments: null + created: 2026-02-16 08:10:00.961650+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c89a770>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:00.963420+00:00 + duration_ms: 42.879 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_curtains-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_curtains-0.yaml new file mode 100644 index 000000000..d3bbee2df --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_curtains-0.yaml @@ -0,0 +1,261 @@ +--- +uuid: d0dd2fd0-9382-4802-b588-4ab3709b7582 +task_id: home7_dk_cover_curtain-open_the_living_room_curtains-0 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the living room curtains + expect_changes: + cover.smart_curtain: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the living room curtains + context: + id: 01KHJQZGNXKRH9CS4WDEATH624 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:57.693536+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:09:57.697046+00:00 + - role: user + content: Open the living room curtains + attachments: null + created: 2026-02-16 08:09:57.693612+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17da7dd20>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:57.697062+00:00 + duration_ms: 49.957 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_curtains-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_curtains-1.yaml new file mode 100644 index 000000000..a9f1aee27 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_curtains-1.yaml @@ -0,0 +1,261 @@ +--- +uuid: c4329253-4a89-49de-9f69-96fc8036d988 +task_id: home7_dk_cover_curtain-open_the_living_room_curtains-1 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the living room curtains + expect_changes: + cover.smart_curtain: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the living room curtains + context: + id: 01KHJQZH0HXZ4MVY5APTDBPWHC + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:58.034008+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:09:58.035722+00:00 + - role: user + content: Open the living room curtains + attachments: null + created: 2026-02-16 08:09:58.034084+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e8724b0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:58.035736+00:00 + duration_ms: 149.7 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_curtains-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_curtains-2.yaml new file mode 100644 index 000000000..56e8d88ba --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_curtains-2.yaml @@ -0,0 +1,261 @@ +--- +uuid: 6aa9e9e6-e32e-420b-858a-1340c7e2cf23 +task_id: home7_dk_cover_curtain-open_the_living_room_curtains-2 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the living room curtains + expect_changes: + cover.smart_curtain: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the living room curtains + context: + id: 01KHJQZHDVQW5WVT10Y6MP313H + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:58.459144+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:09:58.460774+00:00 + - role: user + content: Open the living room curtains + attachments: null + created: 2026-02-16 08:09:58.459220+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16f1b0a90>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:58.460787+00:00 + duration_ms: 44.434 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_curtains-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_curtains-3.yaml new file mode 100644 index 000000000..d57723fbc --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_curtains-3.yaml @@ -0,0 +1,261 @@ +--- +uuid: 169ba851-ac0b-4226-8e1f-ae00377b3bc1 +task_id: home7_dk_cover_curtain-open_the_living_room_curtains-3 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the living room curtains + expect_changes: + cover.smart_curtain: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the living room curtains + context: + id: 01KHJQZHPQHWRPN5CW86WJY7X7 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:58.743823+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:09:58.745724+00:00 + - role: user + content: Open the living room curtains + attachments: null + created: 2026-02-16 08:09:58.743896+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e3b1220>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:58.745736+00:00 + duration_ms: 42.322 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_curtains-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_curtains-4.yaml new file mode 100644 index 000000000..86c1d0953 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_curtains-4.yaml @@ -0,0 +1,261 @@ +--- +uuid: acd0f5cb-0126-4e24-a91f-9d67f9a353c2 +task_id: home7_dk_cover_curtain-open_the_living_room_curtains-4 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the living room curtains + expect_changes: + cover.smart_curtain: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the living room curtains + context: + id: 01KHJQZHZMJNNEFD89S0WE0Z8M + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:59.028165+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:09:59.030424+00:00 + - role: user + content: Open the living room curtains + attachments: null + created: 2026-02-16 08:09:59.028267+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e5b96f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:59.030439+00:00 + duration_ms: 44.183 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_smart_curtain-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_smart_curtain-0.yaml new file mode 100644 index 000000000..10c0d94b4 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_smart_curtain-0.yaml @@ -0,0 +1,261 @@ +--- +uuid: b1e1160c-a9d9-42a9-9540-3a82701a77d7 +task_id: home7_dk_cover_curtain-open_the_living_room_smart_curtain-0 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the living room smart curtain + expect_changes: + cover.smart_curtain: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the living room smart curtain + context: + id: 01KHJQZES8N1T30XE95MJXARWN + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:55.752141+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:09:55.754576+00:00 + - role: user + content: Open the living room smart curtain + attachments: null + created: 2026-02-16 08:09:55.752215+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d5f6560>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:55.754589+00:00 + duration_ms: 45.184 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_smart_curtain-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_smart_curtain-1.yaml new file mode 100644 index 000000000..0c5482734 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_smart_curtain-1.yaml @@ -0,0 +1,261 @@ +--- +uuid: 23a58676-0984-4331-ba0b-b4d3405d3c9a +task_id: home7_dk_cover_curtain-open_the_living_room_smart_curtain-1 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the living room smart curtain + expect_changes: + cover.smart_curtain: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the living room smart curtain + context: + id: 01KHJQZF2WXYVZQ0F6D51HXXP6 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:56.060835+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + created: 2026-02-16 08:09:56.064417+00:00 + - role: user + content: Open the living room smart curtain + attachments: null + created: 2026-02-16 08:09:56.060910+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17daa78a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:56.064433+00:00 + duration_ms: 44.536 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_smart_curtain-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_smart_curtain-2.yaml new file mode 100644 index 000000000..55e5f61bf --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_smart_curtain-2.yaml @@ -0,0 +1,261 @@ +--- +uuid: 47911ec6-c6d3-4ac5-80c7-24d3c0f681cd +task_id: home7_dk_cover_curtain-open_the_living_room_smart_curtain-2 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the living room smart curtain + expect_changes: + cover.smart_curtain: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the living room smart curtain + context: + id: 01KHJQZFC8CHSSD6ZM46FTQDKC + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:56.360830+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:09:56.366055+00:00 + - role: user + content: Open the living room smart curtain + attachments: null + created: 2026-02-16 08:09:56.360904+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16dd807d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:56.366071+00:00 + duration_ms: 136.706 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_smart_curtain-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_smart_curtain-3.yaml new file mode 100644 index 000000000..3860eedd9 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_smart_curtain-3.yaml @@ -0,0 +1,261 @@ +--- +uuid: 0849118b-9d4e-4e18-b419-26ddd8306698 +task_id: home7_dk_cover_curtain-open_the_living_room_smart_curtain-3 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the living room smart curtain + expect_changes: + cover.smart_curtain: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the living room smart curtain + context: + id: 01KHJQZG0P57S9YN5DDXBEN25B + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:57.014946+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:09:57.017176+00:00 + - role: user + content: Open the living room smart curtain + attachments: null + created: 2026-02-16 08:09:57.015021+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16fd1f530>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:57.017189+00:00 + duration_ms: 123.694 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_smart_curtain-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_smart_curtain-4.yaml new file mode 100644 index 000000000..bf64e5e7f --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_living_room_smart_curtain-4.yaml @@ -0,0 +1,261 @@ +--- +uuid: 5b452537-7265-451a-8cb9-bcf17517c834 +task_id: home7_dk_cover_curtain-open_the_living_room_smart_curtain-4 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the living room smart curtain + expect_changes: + cover.smart_curtain: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the living room smart curtain + context: + id: 01KHJQZGCQX9MCB3BCB34KVRBN + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:57.399529+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:09:57.402911+00:00 + - role: user + content: Open the living room smart curtain + attachments: null + created: 2026-02-16 08:09:57.399604+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16ffda610>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:57.402926+00:00 + duration_ms: 41.947 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_smart_curtain-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_smart_curtain-0.yaml new file mode 100644 index 000000000..6be571fb8 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_smart_curtain-0.yaml @@ -0,0 +1,261 @@ +--- +uuid: d04dacdb-84e6-4c49-b8a6-a8164f8542de +task_id: home7_dk_cover_curtain-open_the_smart_curtain-0 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the smart curtain + expect_changes: + cover.smart_curtain: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the smart curtain + context: + id: 01KHJQZB21PK6CCW3TQ7C0SBDC + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:51.937707+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:09:51.939276+00:00 + - role: user + content: Open the smart curtain + attachments: null + created: 2026-02-16 08:09:51.937782+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16ee60880>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:51.939289+00:00 + duration_ms: 42.825 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_smart_curtain-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_smart_curtain-1.yaml new file mode 100644 index 000000000..62236e5c2 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_smart_curtain-1.yaml @@ -0,0 +1,261 @@ +--- +uuid: ab214cde-ecf0-42a5-a242-7df1a1ce7fcc +task_id: home7_dk_cover_curtain-open_the_smart_curtain-1 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the smart curtain + expect_changes: + cover.smart_curtain: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the smart curtain + context: + id: 01KHJQZDEF01VK8GAZX6P0B0E8 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:54.383288+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:09:54.384832+00:00 + - role: user + content: Open the smart curtain + attachments: null + created: 2026-02-16 08:09:54.383360+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e5b10c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:54.384843+00:00 + duration_ms: 221.175 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_smart_curtain-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_smart_curtain-2.yaml new file mode 100644 index 000000000..10f10ce87 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_smart_curtain-2.yaml @@ -0,0 +1,261 @@ +--- +uuid: f6601428-defd-426f-9521-ef17f32f26bf +task_id: home7_dk_cover_curtain-open_the_smart_curtain-2 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the smart curtain + expect_changes: + cover.smart_curtain: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the smart curtain + context: + id: 01KHJQZDX2W5JG5F7S4RNEZRDG + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:54.850978+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:09:54.856423+00:00 + - role: user + content: Open the smart curtain + attachments: null + created: 2026-02-16 08:09:54.851055+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16ef9bd70>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:54.856438+00:00 + duration_ms: 45.439 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_smart_curtain-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_smart_curtain-3.yaml new file mode 100644 index 000000000..9bfa95733 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_smart_curtain-3.yaml @@ -0,0 +1,261 @@ +--- +uuid: 8a27b5a7-9730-4ee9-a0c3-c6316846d8b7 +task_id: home7_dk_cover_curtain-open_the_smart_curtain-3 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the smart curtain + expect_changes: + cover.smart_curtain: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the smart curtain + context: + id: 01KHJQZE63358SAXD4BQFW4SKM + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:55.139395+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:09:55.141334+00:00 + - role: user + content: Open the smart curtain + attachments: null + created: 2026-02-16 08:09:55.139467+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e588250>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:55.141347+00:00 + duration_ms: 41.93 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_smart_curtain-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_smart_curtain-4.yaml new file mode 100644 index 000000000..06a208bc5 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-open_the_smart_curtain-4.yaml @@ -0,0 +1,261 @@ +--- +uuid: fba4c13a-42ed-4b5f-902a-ad162bde8596 +task_id: home7_dk_cover_curtain-open_the_smart_curtain-4 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Open the smart curtain + expect_changes: + cover.smart_curtain: + state: open + attributes: null +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Open the smart curtain + context: + id: 01KHJQZEFFTC6CVAB04P8ZETW1 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:55.439194+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:09:55.441267+00:00 + - role: user + content: Open the smart curtain + attachments: null + created: 2026-02-16 08:09:55.439313+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16f106090>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:55.441280+00:00 + duration_ms: 41.064 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_living_room_curtains_to_50-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_living_room_curtains_to_50-0.yaml new file mode 100644 index 000000000..00e282402 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_living_room_curtains_to_50-0.yaml @@ -0,0 +1,262 @@ +--- +uuid: 1d61176f-a7f5-4e2e-9c86-ea31d65f8085 +task_id: home7_dk_cover_curtain-set_the_living_room_curtains_to_50-0 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Set the living room curtains to 50% + expect_changes: + cover.smart_curtain: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Set the living room curtains to 50% + context: + id: 01KHJQZSEM61YDWM7HXM38190S + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:06.676986+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:06.679007+00:00 + - role: user + content: Set the living room curtains to 50% + attachments: null + created: 2026-02-16 08:10:06.677058+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e0cf530>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:06.679020+00:00 + duration_ms: 130.855 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_living_room_curtains_to_50-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_living_room_curtains_to_50-1.yaml new file mode 100644 index 000000000..78c1b5d55 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_living_room_curtains_to_50-1.yaml @@ -0,0 +1,262 @@ +--- +uuid: 35640599-705d-4302-84c1-fa9eb6257220 +task_id: home7_dk_cover_curtain-set_the_living_room_curtains_to_50-1 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Set the living room curtains to 50% + expect_changes: + cover.smart_curtain: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Set the living room curtains to 50% + context: + id: 01KHJQZSTV7G32MVQCE9DFNFNC + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:07.067943+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:07.069647+00:00 + - role: user + content: Set the living room curtains to 50% + attachments: null + created: 2026-02-16 08:10:07.068019+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16f15d2d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:07.069659+00:00 + duration_ms: 45.599 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_living_room_curtains_to_50-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_living_room_curtains_to_50-2.yaml new file mode 100644 index 000000000..053b101a6 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_living_room_curtains_to_50-2.yaml @@ -0,0 +1,262 @@ +--- +uuid: b926ea4c-8516-49ab-a845-83ee0e2f80cc +task_id: home7_dk_cover_curtain-set_the_living_room_curtains_to_50-2 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Set the living room curtains to 50% + expect_changes: + cover.smart_curtain: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Set the living room curtains to 50% + context: + id: 01KHJQZT8YQR6PEEWDFVPMEZNB + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:07.518428+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:07.520487+00:00 + - role: user + content: Set the living room curtains to 50% + attachments: null + created: 2026-02-16 08:10:07.518503+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e5443b0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:07.520499+00:00 + duration_ms: 48.114 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_living_room_curtains_to_50-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_living_room_curtains_to_50-3.yaml new file mode 100644 index 000000000..e66464058 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_living_room_curtains_to_50-3.yaml @@ -0,0 +1,262 @@ +--- +uuid: a60a6071-5816-42d9-bb2d-86d38acd6826 +task_id: home7_dk_cover_curtain-set_the_living_room_curtains_to_50-3 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Set the living room curtains to 50% + expect_changes: + cover.smart_curtain: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Set the living room curtains to 50% + context: + id: 01KHJQZTN6WZRWHPQGGDJ2A1H1 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:07.910636+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:07.913015+00:00 + - role: user + content: Set the living room curtains to 50% + attachments: null + created: 2026-02-16 08:10:07.910710+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16f4f83b0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:07.913028+00:00 + duration_ms: 128.369 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_living_room_curtains_to_50-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_living_room_curtains_to_50-4.yaml new file mode 100644 index 000000000..9327b0bb4 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_living_room_curtains_to_50-4.yaml @@ -0,0 +1,262 @@ +--- +uuid: 63f2e9bf-f626-4d55-b467-79f0da52759f +task_id: home7_dk_cover_curtain-set_the_living_room_curtains_to_50-4 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Set the living room curtains to 50% + expect_changes: + cover.smart_curtain: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Set the living room curtains to 50% + context: + id: 01KHJQZV0SV04TRZMQ6W7H9G5J + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:08.281832+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:08.283426+00:00 + - role: user + content: Set the living room curtains to 50% + attachments: null + created: 2026-02-16 08:10:08.281906+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16f277b60>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:08.283438+00:00 + duration_ms: 123.133 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_smart_curtain_to_50-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_smart_curtain_to_50-0.yaml new file mode 100644 index 000000000..aa9b1a269 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_smart_curtain_to_50-0.yaml @@ -0,0 +1,262 @@ +--- +uuid: 64731dce-ee9b-4b6a-9cc5-49a2533d271a +task_id: home7_dk_cover_curtain-set_the_smart_curtain_to_50-0 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Set the smart curtain to 50% + expect_changes: + cover.smart_curtain: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Set the smart curtain to 50% + context: + id: 01KHJQZQD61EXHW8SHKQBNFE0F + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:04.582775+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:04.584504+00:00 + - role: user + content: Set the smart curtain to 50% + attachments: null + created: 2026-02-16 08:10:04.582848+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16fd18300>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:04.584518+00:00 + duration_ms: 130.346 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_smart_curtain_to_50-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_smart_curtain_to_50-1.yaml new file mode 100644 index 000000000..0cd968084 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_smart_curtain_to_50-1.yaml @@ -0,0 +1,262 @@ +--- +uuid: cefdb077-dc28-471b-81b9-189fabd5ac63 +task_id: home7_dk_cover_curtain-set_the_smart_curtain_to_50-1 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Set the smart curtain to 50% + expect_changes: + cover.smart_curtain: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Set the smart curtain to 50% + context: + id: 01KHJQZQSVAHSYYXY1GEJ2191T + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:04.987526+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:04.989618+00:00 + - role: user + content: Set the smart curtain to 50% + attachments: null + created: 2026-02-16 08:10:04.987598+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16faf9f30>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:04.989631+00:00 + duration_ms: 42.058 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_smart_curtain_to_50-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_smart_curtain_to_50-2.yaml new file mode 100644 index 000000000..9f645c517 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_smart_curtain_to_50-2.yaml @@ -0,0 +1,262 @@ +--- +uuid: f1b239eb-cb02-4305-b860-8d6a719390f0 +task_id: home7_dk_cover_curtain-set_the_smart_curtain_to_50-2 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Set the smart curtain to 50% + expect_changes: + cover.smart_curtain: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Set the smart curtain to 50% + context: + id: 01KHJQZR2G4E248VPT4G17CJHG + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:05.264446+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:05.267355+00:00 + - role: user + content: Set the smart curtain to 50% + attachments: null + created: 2026-02-16 08:10:05.264519+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16f966ae0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:05.267368+00:00 + duration_ms: 129.163 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_smart_curtain_to_50-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_smart_curtain_to_50-3.yaml new file mode 100644 index 000000000..e57166b89 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_smart_curtain_to_50-3.yaml @@ -0,0 +1,262 @@ +--- +uuid: b8a4e3d0-c799-4f1a-b824-c73b70bce3a2 +task_id: home7_dk_cover_curtain-set_the_smart_curtain_to_50-3 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Set the smart curtain to 50% + expect_changes: + cover.smart_curtain: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Set the smart curtain to 50% + context: + id: 01KHJQZRTAH1YFFPN12S1FC40E + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:06.027022+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:06.028695+00:00 + - role: user + content: Set the smart curtain to 50% + attachments: null + created: 2026-02-16 08:10:06.027098+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16ef9bd70>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:06.028709+00:00 + duration_ms: 46.865 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_smart_curtain_to_50-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_smart_curtain_to_50-4.yaml new file mode 100644 index 000000000..b4d6eeb80 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_cover_curtain-set_the_smart_curtain_to_50-4.yaml @@ -0,0 +1,262 @@ +--- +uuid: 9bb0f0c0-63ea-4a9f-ae2e-26c44e282080 +task_id: home7_dk_cover_curtain-set_the_smart_curtain_to_50-4 +model_id: gemma-3-27b-it +category: cover +task: + input_text: Set the smart curtain to 50% + expect_changes: + cover.smart_curtain: + state: open + attributes: + current_position: 100 +response: Error talking to API +context: + unexpected_states: + cover.smart_curtain: + expected: + state: open + got: + state: closed + conversation_trace: + - event_type: async_process + data: + text: Set the smart curtain to 50% + context: + id: 01KHJQZS2WCNA77R9AW48Z79J5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:06.300995+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:06.302615+00:00 + - role: user + content: Set the smart curtain to 50% + attachments: null + created: 2026-02-16 08:10:06.301072+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16eea6e50>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:06.302628+00:00 + duration_ms: 133.958 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_music-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_music-0.yaml new file mode 100644 index 000000000..857f85213 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_music-0.yaml @@ -0,0 +1,262 @@ +--- +uuid: f5e64554-c733-4855-b46b-ba774263991d +task_id: home7_dk_media_player-mute_the_music-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Mute the music + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.0 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.0 + got: + volume_level: 0.6 + conversation_trace: + - event_type: async_process + data: + text: Mute the music + context: + id: 01KHJR0KSYEMR5M13QTARSCRTG + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:33.662631+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:33.665843+00:00 + - role: user + content: Mute the music + attachments: null + created: 2026-02-16 08:10:33.662706+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16ea29850>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:33.665856+00:00 + duration_ms: 138.545 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_music-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_music-1.yaml new file mode 100644 index 000000000..3341aa4bd --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_music-1.yaml @@ -0,0 +1,262 @@ +--- +uuid: 039e5783-e6ce-4966-b43b-4e1fedcf50b2 +task_id: home7_dk_media_player-mute_the_music-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Mute the music + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.0 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.0 + got: + volume_level: 0.6 + conversation_trace: + - event_type: async_process + data: + text: Mute the music + context: + id: 01KHJR0M5WYCJ4S38WC4E0RSW5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:34.045084+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:34.048283+00:00 + - role: user + content: Mute the music + attachments: null + created: 2026-02-16 08:10:34.045155+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d0f1430>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:34.048297+00:00 + duration_ms: 137.804 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_music-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_music-2.yaml new file mode 100644 index 000000000..e49936c43 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_music-2.yaml @@ -0,0 +1,262 @@ +--- +uuid: b1dd0bd4-71e2-4ec7-94e3-f2a6c85975ea +task_id: home7_dk_media_player-mute_the_music-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Mute the music + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.0 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.0 + got: + volume_level: 0.6 + conversation_trace: + - event_type: async_process + data: + text: Mute the music + context: + id: 01KHJR0MJFVDAAYMM7168R7QKW + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:34.447214+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:34.450378+00:00 + - role: user + content: Mute the music + attachments: null + created: 2026-02-16 08:10:34.447312+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e0da6c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:34.450390+00:00 + duration_ms: 152.981 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_music-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_music-3.yaml new file mode 100644 index 000000000..c79273344 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_music-3.yaml @@ -0,0 +1,262 @@ +--- +uuid: aacf5a98-925a-4ffe-99e1-3d663b2233c5 +task_id: home7_dk_media_player-mute_the_music-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Mute the music + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.0 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.0 + got: + volume_level: 0.6 + conversation_trace: + - event_type: async_process + data: + text: Mute the music + context: + id: 01KHJR0MYBWR6G9P71Z23P6TMS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:34.827959+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:34.830954+00:00 + - role: user + content: Mute the music + attachments: null + created: 2026-02-16 08:10:34.828031+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16ff22090>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:34.830968+00:00 + duration_ms: 43.56 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_music-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_music-4.yaml new file mode 100644 index 000000000..3ecf531d5 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_music-4.yaml @@ -0,0 +1,262 @@ +--- +uuid: e54ac56a-d4bf-4da2-b711-90de005e9332 +task_id: home7_dk_media_player-mute_the_music-4 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Mute the music + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.0 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.0 + got: + volume_level: 0.6 + conversation_trace: + - event_type: async_process + data: + text: Mute the music + context: + id: 01KHJR0N8QCXZHYE1GSB1BD0NJ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:35.159469+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:35.161080+00:00 + - role: user + content: Mute the music + attachments: null + created: 2026-02-16 08:10:35.159541+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16d8614e0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:35.161093+00:00 + duration_ms: 50.352 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_outdoor_speakers-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_outdoor_speakers-0.yaml new file mode 100644 index 000000000..f7dcafa7d --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_outdoor_speakers-0.yaml @@ -0,0 +1,262 @@ +--- +uuid: 4c86cedf-ba3d-475b-a448-90cfd1cf309e +task_id: home7_dk_media_player-mute_the_outdoor_speakers-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Mute the outdoor speakers + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.0 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.0 + got: + volume_level: 0.6 + conversation_trace: + - event_type: async_process + data: + text: Mute the outdoor speakers + context: + id: 01KHJR0J9BRTD2G4QB7S00T0Y4 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:32.108088+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:32.109614+00:00 + - role: user + content: Mute the outdoor speakers + attachments: null + created: 2026-02-16 08:10:32.108163+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16d43eda0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:32.109626+00:00 + duration_ms: 40.87 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_outdoor_speakers-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_outdoor_speakers-1.yaml new file mode 100644 index 000000000..be65a2ffa --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_outdoor_speakers-1.yaml @@ -0,0 +1,262 @@ +--- +uuid: 0c6ad6c5-a049-4bfa-9c48-0f64084205ea +task_id: home7_dk_media_player-mute_the_outdoor_speakers-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Mute the outdoor speakers + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.0 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.0 + got: + volume_level: 0.6 + conversation_trace: + - event_type: async_process + data: + text: Mute the outdoor speakers + context: + id: 01KHJR0JJAQ57GEA5KMZ56GY5D + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:32.394713+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:32.399102+00:00 + - role: user + content: Mute the outdoor speakers + attachments: null + created: 2026-02-16 08:10:32.394788+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16df0d170>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:32.399115+00:00 + duration_ms: 45.297 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_outdoor_speakers-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_outdoor_speakers-2.yaml new file mode 100644 index 000000000..81bb78fb2 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_outdoor_speakers-2.yaml @@ -0,0 +1,262 @@ +--- +uuid: f208f790-8a78-4c47-b194-ca592b10e32e +task_id: home7_dk_media_player-mute_the_outdoor_speakers-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Mute the outdoor speakers + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.0 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.0 + got: + volume_level: 0.6 + conversation_trace: + - event_type: async_process + data: + text: Mute the outdoor speakers + context: + id: 01KHJR0JVDFEYVHFV8B9469YNG + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:32.686074+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:32.687607+00:00 + - role: user + content: Mute the outdoor speakers + attachments: null + created: 2026-02-16 08:10:32.686146+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e58cb40>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:32.687619+00:00 + duration_ms: 44.776 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_outdoor_speakers-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_outdoor_speakers-3.yaml new file mode 100644 index 000000000..0a2e68d39 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_outdoor_speakers-3.yaml @@ -0,0 +1,262 @@ +--- +uuid: 607043a4-1995-41b9-8135-ff769c75ba41 +task_id: home7_dk_media_player-mute_the_outdoor_speakers-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Mute the outdoor speakers + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.0 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.0 + got: + volume_level: 0.6 + conversation_trace: + - event_type: async_process + data: + text: Mute the outdoor speakers + context: + id: 01KHJR0K4DQT2X5P288Q7AB75P + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:32.973768+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:32.976961+00:00 + - role: user + content: Mute the outdoor speakers + attachments: null + created: 2026-02-16 08:10:32.973841+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16dc73690>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:32.976974+00:00 + duration_ms: 43.361 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_outdoor_speakers-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_outdoor_speakers-4.yaml new file mode 100644 index 000000000..13fab00b3 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-mute_the_outdoor_speakers-4.yaml @@ -0,0 +1,262 @@ +--- +uuid: 72fbfa3e-ed7b-419b-9e97-fd36d0f2b667 +task_id: home7_dk_media_player-mute_the_outdoor_speakers-4 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Mute the outdoor speakers + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.0 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.0 + got: + volume_level: 0.6 + conversation_trace: + - event_type: async_process + data: + text: Mute the outdoor speakers + context: + id: 01KHJR0KDE2795K2DFWKKTVNWH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:33.262284+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:33.266390+00:00 + - role: user + content: Mute the outdoor speakers + attachments: null + created: 2026-02-16 08:10:33.262358+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16fbe81a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:33.266404+00:00 + duration_ms: 136.56 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-next_song-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-next_song-0.yaml new file mode 100644 index 000000000..315ff6958 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-next_song-0.yaml @@ -0,0 +1,262 @@ +--- +uuid: 4753e5b0-194e-4a3a-8759-f729eea51de5 +task_id: home7_dk_media_player-next_song-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Next song + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Next song + context: + id: 01KHJR0C2GAPC0W4S1HZD2GMJW + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:25.744747+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:25.747219+00:00 + - role: user + content: Next song + attachments: null + created: 2026-02-16 08:10:25.744824+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d9d4720>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:25.747246+00:00 + duration_ms: 41.456 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-next_song-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-next_song-1.yaml new file mode 100644 index 000000000..aa91324c4 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-next_song-1.yaml @@ -0,0 +1,262 @@ +--- +uuid: 80436064-748b-44a0-b3ea-9af649858990 +task_id: home7_dk_media_player-next_song-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Next song + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Next song + context: + id: 01KHJR0CMBZF06RA3SEMQ24J8H + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:26.316037+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:26.319136+00:00 + - role: user + content: Next song + attachments: null + created: 2026-02-16 08:10:26.316111+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16cbcbed0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:26.319150+00:00 + duration_ms: 132.401 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-next_song-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-next_song-2.yaml new file mode 100644 index 000000000..352c605d6 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-next_song-2.yaml @@ -0,0 +1,262 @@ +--- +uuid: efdc5974-8fee-4a45-9032-62c7c8fdf451 +task_id: home7_dk_media_player-next_song-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Next song + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Next song + context: + id: 01KHJR0D1ZW48TK01PZT2RTRGE + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:26.751331+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:26.752968+00:00 + - role: user + content: Next song + attachments: null + created: 2026-02-16 08:10:26.751404+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c9e2c40>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:26.752980+00:00 + duration_ms: 53.589 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-next_song-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-next_song-3.yaml new file mode 100644 index 000000000..80fd82ed5 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-next_song-3.yaml @@ -0,0 +1,262 @@ +--- +uuid: 899fca31-fb7c-4863-abef-d40f0baab73b +task_id: home7_dk_media_player-next_song-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Next song + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Next song + context: + id: 01KHJR0DBWXNEM36H30PCTVCXD + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:27.068371+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:27.070011+00:00 + - role: user + content: Next song + attachments: null + created: 2026-02-16 08:10:27.068447+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d1bd6f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:27.070023+00:00 + duration_ms: 49.299 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-next_song-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-next_song-4.yaml new file mode 100644 index 000000000..266bf5612 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-next_song-4.yaml @@ -0,0 +1,262 @@ +--- +uuid: 158c13a5-0602-418a-a912-736ea1bd5512 +task_id: home7_dk_media_player-next_song-4 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Next song + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Next song + context: + id: 01KHJR0DMT6WD22RT65GCHFF3Z + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:27.354951+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:27.356555+00:00 + - role: user + content: Next song + attachments: null + created: 2026-02-16 08:10:27.355028+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d0cbc10>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:27.356568+00:00 + duration_ms: 41.404 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_outdoor_speakers-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_outdoor_speakers-0.yaml new file mode 100644 index 000000000..3239fa173 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_outdoor_speakers-0.yaml @@ -0,0 +1,261 @@ +--- +uuid: de8835a9-ef41-42dc-89fd-e4a3b34d18bf +task_id: home7_dk_media_player-pause_outdoor_speakers-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause Outdoor Speakers + context: + id: 01KHJQZVCHK986FY1FRT35SM1Q + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:08.657348+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:08.658937+00:00 + - role: user + content: Pause Outdoor Speakers + attachments: null + created: 2026-02-16 08:10:08.657423+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d82fcc0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:08.658949+00:00 + duration_ms: 40.601 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_outdoor_speakers-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_outdoor_speakers-1.yaml new file mode 100644 index 000000000..00f429539 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_outdoor_speakers-1.yaml @@ -0,0 +1,261 @@ +--- +uuid: 244bd8ae-4f37-45d2-9454-64e618a58d87 +task_id: home7_dk_media_player-pause_outdoor_speakers-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause Outdoor Speakers + context: + id: 01KHJQZVNSAJSP43988BQB6EP6 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:08.953335+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:08.954990+00:00 + - role: user + content: Pause Outdoor Speakers + attachments: null + created: 2026-02-16 08:10:08.953409+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17da7a2a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:08.955004+00:00 + duration_ms: 40.747 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_outdoor_speakers-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_outdoor_speakers-2.yaml new file mode 100644 index 000000000..9ceaea3f8 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_outdoor_speakers-2.yaml @@ -0,0 +1,261 @@ +--- +uuid: c7361476-c47f-44fa-9c39-7836ffd7d7cc +task_id: home7_dk_media_player-pause_outdoor_speakers-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause Outdoor Speakers + context: + id: 01KHJQZVZ2R4GYYB7YD4JV4S1B + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:09.250946+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:09.252913+00:00 + - role: user + content: Pause Outdoor Speakers + attachments: null + created: 2026-02-16 08:10:09.251020+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17de5cca0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:09.252928+00:00 + duration_ms: 41.129 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_outdoor_speakers-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_outdoor_speakers-3.yaml new file mode 100644 index 000000000..22992f311 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_outdoor_speakers-3.yaml @@ -0,0 +1,261 @@ +--- +uuid: 8bc14a60-f5a8-4b88-b53a-c787bb8e5143 +task_id: home7_dk_media_player-pause_outdoor_speakers-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause Outdoor Speakers + context: + id: 01KHJQZW8ES1Z666CX7DTPXV28 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:09.550887+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:09.554410+00:00 + - role: user + content: Pause Outdoor Speakers + attachments: null + created: 2026-02-16 08:10:09.550973+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e98efb0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:09.554424+00:00 + duration_ms: 45.13 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_outdoor_speakers-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_outdoor_speakers-4.yaml new file mode 100644 index 000000000..2d4feae04 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_outdoor_speakers-4.yaml @@ -0,0 +1,261 @@ +--- +uuid: 1ade0667-de87-41d9-a9ea-79fbb7f42b21 +task_id: home7_dk_media_player-pause_outdoor_speakers-4 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause Outdoor Speakers + context: + id: 01KHJQZWJW29E84XA0S9FQ9XB7 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:09.884400+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:09.886034+00:00 + - role: user + content: Pause Outdoor Speakers + attachments: null + created: 2026-02-16 08:10:09.884473+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e525d20>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:09.886046+00:00 + duration_ms: 141.044 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music-0.yaml new file mode 100644 index 000000000..1007a8687 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music-0.yaml @@ -0,0 +1,261 @@ +--- +uuid: 4ddf1078-9409-40b2-9ba1-1066e572b6a7 +task_id: home7_dk_media_player-pause_the_music-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause the music + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause the music + context: + id: 01KHJQZWZAFNX8C6286YP6GDXD + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:10.282358+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:10.284005+00:00 + - role: user + content: Pause the music + attachments: null + created: 2026-02-16 08:10:10.282436+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16fca0a90>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:10.284018+00:00 + duration_ms: 44.178 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music-1.yaml new file mode 100644 index 000000000..363bde8e9 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music-1.yaml @@ -0,0 +1,261 @@ +--- +uuid: 1b910134-1a55-446e-8d4e-4717dc020555 +task_id: home7_dk_media_player-pause_the_music-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause the music + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause the music + context: + id: 01KHJQZX7TKX76KVJJ2895ZXVF + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:10.554968+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:10.556577+00:00 + - role: user + content: Pause the music + attachments: null + created: 2026-02-16 08:10:10.555040+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16dd64510>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:10.556590+00:00 + duration_ms: 40.835 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music-2.yaml new file mode 100644 index 000000000..3a2cd636c --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music-2.yaml @@ -0,0 +1,261 @@ +--- +uuid: b3d72a01-fff5-46ce-b3fc-f57b68206310 +task_id: home7_dk_media_player-pause_the_music-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause the music + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause the music + context: + id: 01KHJQZXH7S9DR2YXSSKREXKWX + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:10.855166+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:10.857054+00:00 + - role: user + content: Pause the music + attachments: null + created: 2026-02-16 08:10:10.855265+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d724b40>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:10.857067+00:00 + duration_ms: 122.656 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music-3.yaml new file mode 100644 index 000000000..d57d14cfc --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music-3.yaml @@ -0,0 +1,261 @@ +--- +uuid: 9eddba86-e4f6-4bbd-8819-82334f938f5c +task_id: home7_dk_media_player-pause_the_music-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause the music + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause the music + context: + id: 01KHJQZXWR8VTC1WYETQR9MRVQ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:11.224396+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:11.228609+00:00 + - role: user + content: Pause the music + attachments: null + created: 2026-02-16 08:10:11.224469+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16ecf0b40>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:11.228622+00:00 + duration_ms: 43.091 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music-4.yaml new file mode 100644 index 000000000..76602909a --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music-4.yaml @@ -0,0 +1,261 @@ +--- +uuid: c0f48e6a-e28a-42cb-921d-b8c3f383f638 +task_id: home7_dk_media_player-pause_the_music-4 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause the music + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause the music + context: + id: 01KHJQZY62HSSV7Q1J76X6P435 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:11.522871+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:11.529407+00:00 + - role: user + content: Pause the music + attachments: null + created: 2026-02-16 08:10:11.522943+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16ef73ed0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:11.529424+00:00 + duration_ms: 141.077 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music_outside-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music_outside-0.yaml new file mode 100644 index 000000000..64acdcf00 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music_outside-0.yaml @@ -0,0 +1,261 @@ +--- +uuid: 6dd2ad55-d5f7-46ec-a62b-50ac6b3311fb +task_id: home7_dk_media_player-pause_the_music_outside-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause the music outside + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause the music outside + context: + id: 01KHJQZYJCZQZC67EA16Y2NWNP + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:11.916746+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:11.920192+00:00 + - role: user + content: Pause the music outside + attachments: null + created: 2026-02-16 08:10:11.916819+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16de0e6c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:11.920207+00:00 + duration_ms: 140.968 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music_outside-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music_outside-1.yaml new file mode 100644 index 000000000..488c0b8b1 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music_outside-1.yaml @@ -0,0 +1,261 @@ +--- +uuid: 16616ed7-dcdc-441c-b57d-66dca431e285 +task_id: home7_dk_media_player-pause_the_music_outside-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause the music outside + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause the music outside + context: + id: 01KHJQZYYWV5JKD21EW69SF460 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:12.316263+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:12.319456+00:00 + - role: user + content: Pause the music outside + attachments: null + created: 2026-02-16 08:10:12.316338+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16eb32610>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:12.319470+00:00 + duration_ms: 43.952 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music_outside-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music_outside-2.yaml new file mode 100644 index 000000000..8bc313899 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music_outside-2.yaml @@ -0,0 +1,261 @@ +--- +uuid: 19cc28b6-8b51-45a0-8f29-582f43253e9b +task_id: home7_dk_media_player-pause_the_music_outside-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause the music outside + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause the music outside + context: + id: 01KHJQZZ80HZW2BFG8YHXW18AK + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:12.608846+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:12.612233+00:00 + - role: user + content: Pause the music outside + attachments: null + created: 2026-02-16 08:10:12.608918+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17df6c5c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:12.612250+00:00 + duration_ms: 41.358 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music_outside-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music_outside-3.yaml new file mode 100644 index 000000000..82e8a86af --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music_outside-3.yaml @@ -0,0 +1,261 @@ +--- +uuid: f5673c57-89c4-4374-87d4-fc2cd3fbb8a9 +task_id: home7_dk_media_player-pause_the_music_outside-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause the music outside + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause the music outside + context: + id: 01KHJQZZGZBQQQNM2MK9S7DNTH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:12.895636+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:12.898889+00:00 + - role: user + content: Pause the music outside + attachments: null + created: 2026-02-16 08:10:12.895709+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e71b5e0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:12.898902+00:00 + duration_ms: 44.618 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music_outside-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music_outside-4.yaml new file mode 100644 index 000000000..32b7128f6 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_music_outside-4.yaml @@ -0,0 +1,261 @@ +--- +uuid: 3afa74f6-c256-4d37-9fee-d70df90aea87 +task_id: home7_dk_media_player-pause_the_music_outside-4 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause the music outside + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause the music outside + context: + id: 01KHJQZZTB63J040Q9095E1BFT + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:13.195218+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:13.200603+00:00 + - role: user + content: Pause the music outside + attachments: null + created: 2026-02-16 08:10:13.195324+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17de54eb0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:13.200617+00:00 + duration_ms: 48.443 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_rooftop_terrace_music-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_rooftop_terrace_music-0.yaml new file mode 100644 index 000000000..6a9846933 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_rooftop_terrace_music-0.yaml @@ -0,0 +1,261 @@ +--- +uuid: 85acc8ca-7864-4a9f-b259-138dd2aab67a +task_id: home7_dk_media_player-pause_the_rooftop_terrace_music-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause the Rooftop Terrace music + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause the Rooftop Terrace music + context: + id: 01KHJR003S8DX9GWWQP7WNV6NQ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:13.497400+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:13.543033+00:00 + - role: user + content: Pause the Rooftop Terrace music + attachments: null + created: 2026-02-16 08:10:13.497474+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d7ad640>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:13.543054+00:00 + duration_ms: 83.713 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_rooftop_terrace_music-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_rooftop_terrace_music-1.yaml new file mode 100644 index 000000000..ab568c9a6 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_rooftop_terrace_music-1.yaml @@ -0,0 +1,261 @@ +--- +uuid: ed5ecc36-025a-4fdf-a2ba-1127cdf5368c +task_id: home7_dk_media_player-pause_the_rooftop_terrace_music-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause the Rooftop Terrace music + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause the Rooftop Terrace music + context: + id: 01KHJR00F5CK31CRQ3MNYRJEZ0 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:13.861144+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:13.862803+00:00 + - role: user + content: Pause the Rooftop Terrace music + attachments: null + created: 2026-02-16 08:10:13.861221+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e1a1e80>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:13.862816+00:00 + duration_ms: 45.348 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_rooftop_terrace_music-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_rooftop_terrace_music-2.yaml new file mode 100644 index 000000000..b335403a9 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_rooftop_terrace_music-2.yaml @@ -0,0 +1,261 @@ +--- +uuid: d7edb8e5-f879-4a83-88b3-ca2c7b619f02 +task_id: home7_dk_media_player-pause_the_rooftop_terrace_music-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause the Rooftop Terrace music + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause the Rooftop Terrace music + context: + id: 01KHJR00QZ2QTQ08HZBSJKYJBT + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:14.143253+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:14.144848+00:00 + - role: user + content: Pause the Rooftop Terrace music + attachments: null + created: 2026-02-16 08:10:14.143330+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d0247d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:14.144861+00:00 + duration_ms: 45.423 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_rooftop_terrace_music-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_rooftop_terrace_music-3.yaml new file mode 100644 index 000000000..e23a5932b --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_rooftop_terrace_music-3.yaml @@ -0,0 +1,261 @@ +--- +uuid: 949f0d69-2c66-4d15-b522-4b747e997279 +task_id: home7_dk_media_player-pause_the_rooftop_terrace_music-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause the Rooftop Terrace music + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause the Rooftop Terrace music + context: + id: 01KHJR0114SRE3Q0J66B8QZT9E + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:14.436922+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:14.438958+00:00 + - role: user + content: Pause the Rooftop Terrace music + attachments: null + created: 2026-02-16 08:10:14.436995+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e31fa00>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:14.438971+00:00 + duration_ms: 143.635 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_rooftop_terrace_music-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_rooftop_terrace_music-4.yaml new file mode 100644 index 000000000..d2d7a621d --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-pause_the_rooftop_terrace_music-4.yaml @@ -0,0 +1,261 @@ +--- +uuid: a73b19ed-771c-4a5a-abb0-a614d4613769 +task_id: home7_dk_media_player-pause_the_rooftop_terrace_music-4 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Pause the Rooftop Terrace music + expect_changes: + media_player.outdoor_speakers: + state: paused + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: paused + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Pause the Rooftop Terrace music + context: + id: 01KHJR01DJT7G7QSGJ7CCN10XJ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:14.834167+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:14.836524+00:00 + - role: user + content: Pause the Rooftop Terrace music + attachments: null + created: 2026-02-16 08:10:14.834260+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17db3d590>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:14.836537+00:00 + duration_ms: 172.718 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_outdoor_speakers-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_outdoor_speakers-0.yaml new file mode 100644 index 000000000..bb5c9b54f --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_outdoor_speakers-0.yaml @@ -0,0 +1,261 @@ +--- +uuid: cbf27494-9ba7-424c-ba88-56999b997008 +task_id: home7_dk_media_player-resume_outdoor_speakers-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Resume Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Resume Outdoor Speakers + context: + id: 01KHJR01TWAEBY5646MKEBNEH9 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:15.260899+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:15.266786+00:00 + - role: user + content: Resume Outdoor Speakers + attachments: null + created: 2026-02-16 08:10:15.260976+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16efb49e0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:15.266801+00:00 + duration_ms: 47.505 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_outdoor_speakers-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_outdoor_speakers-1.yaml new file mode 100644 index 000000000..22074a0d5 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_outdoor_speakers-1.yaml @@ -0,0 +1,261 @@ +--- +uuid: 04604ea1-277c-4b50-9232-1cce28fd5b45 +task_id: home7_dk_media_player-resume_outdoor_speakers-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Resume Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Resume Outdoor Speakers + context: + id: 01KHJR02433QGRZ59F5C8XVMAW + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:15.555952+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:15.559365+00:00 + - role: user + content: Resume Outdoor Speakers + attachments: null + created: 2026-02-16 08:10:15.556023+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d270ca0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:15.559378+00:00 + duration_ms: 43.07 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_outdoor_speakers-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_outdoor_speakers-2.yaml new file mode 100644 index 000000000..fd02bf16f --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_outdoor_speakers-2.yaml @@ -0,0 +1,261 @@ +--- +uuid: a09026c7-1225-4fa4-b799-8e905802b633 +task_id: home7_dk_media_player-resume_outdoor_speakers-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Resume Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Resume Outdoor Speakers + context: + id: 01KHJR02D99M899DDVK22ZE44F + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:15.849788+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:15.855518+00:00 + - role: user + content: Resume Outdoor Speakers + attachments: null + created: 2026-02-16 08:10:15.849862+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16dc72560>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:15.855535+00:00 + duration_ms: 154.388 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_outdoor_speakers-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_outdoor_speakers-3.yaml new file mode 100644 index 000000000..fd13126f1 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_outdoor_speakers-3.yaml @@ -0,0 +1,261 @@ +--- +uuid: d9076ca3-e05c-4c3b-b58e-3517d9ffeae5 +task_id: home7_dk_media_player-resume_outdoor_speakers-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Resume Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Resume Outdoor Speakers + context: + id: 01KHJR02SXG6TRYCCQK2PFCGZZ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:16.253183+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:16.256419+00:00 + - role: user + content: Resume Outdoor Speakers + attachments: null + created: 2026-02-16 08:10:16.253280+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17dc3d430>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:16.256432+00:00 + duration_ms: 42.317 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_outdoor_speakers-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_outdoor_speakers-4.yaml new file mode 100644 index 000000000..decdc15a7 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_outdoor_speakers-4.yaml @@ -0,0 +1,261 @@ +--- +uuid: 70bfdcc3-d113-4a19-8c34-effa6e0f6210 +task_id: home7_dk_media_player-resume_outdoor_speakers-4 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Resume Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Resume Outdoor Speakers + context: + id: 01KHJR033T5T213RWN47F45B30 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:16.570612+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:16.576092+00:00 + - role: user + content: Resume Outdoor Speakers + attachments: null + created: 2026-02-16 08:10:16.570687+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c8b35e0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:16.576107+00:00 + duration_ms: 52.1 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_playing_the_music-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_playing_the_music-0.yaml new file mode 100644 index 000000000..559676e72 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_playing_the_music-0.yaml @@ -0,0 +1,261 @@ +--- +uuid: 0e455c4c-1e3a-4f3d-a079-eeadfc5e2117 +task_id: home7_dk_media_player-resume_playing_the_music-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Resume playing the music + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Resume playing the music + context: + id: 01KHJR03D3124MK5K0RTWFSWWH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:16.867336+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:16.872289+00:00 + - role: user + content: Resume playing the music + attachments: null + created: 2026-02-16 08:10:16.867409+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d7faa30>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:16.872302+00:00 + duration_ms: 138.883 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_playing_the_music-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_playing_the_music-1.yaml new file mode 100644 index 000000000..c6c4ede27 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_playing_the_music-1.yaml @@ -0,0 +1,261 @@ +--- +uuid: e581022b-80da-4dc2-a93b-d0028e1b1f02 +task_id: home7_dk_media_player-resume_playing_the_music-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Resume playing the music + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Resume playing the music + context: + id: 01KHJR03S446H385A4Z3BDWZBY + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:17.253050+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:17.256141+00:00 + - role: user + content: Resume playing the music + attachments: null + created: 2026-02-16 08:10:17.253124+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16ef18a90>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:17.256155+00:00 + duration_ms: 42.682 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_playing_the_music-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_playing_the_music-2.yaml new file mode 100644 index 000000000..5ec45439c --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_playing_the_music-2.yaml @@ -0,0 +1,261 @@ +--- +uuid: 1806b065-6400-4c49-b88d-0ab0bad692b8 +task_id: home7_dk_media_player-resume_playing_the_music-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Resume playing the music + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Resume playing the music + context: + id: 01KHJR043ZH2GMY6QY440W3ADR + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:17.599802+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:17.601450+00:00 + - role: user + content: Resume playing the music + attachments: null + created: 2026-02-16 08:10:17.599875+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16fe410c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:17.601463+00:00 + duration_ms: 41.967 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_playing_the_music-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_playing_the_music-3.yaml new file mode 100644 index 000000000..3d51933e6 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_playing_the_music-3.yaml @@ -0,0 +1,261 @@ +--- +uuid: 63156ecc-b81d-41f8-9dc7-92bff7aadd97 +task_id: home7_dk_media_player-resume_playing_the_music-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Resume playing the music + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Resume playing the music + context: + id: 01KHJR04CWE1V3NFR1W4HF4ABF + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:17.884250+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:17.885806+00:00 + - role: user + content: Resume playing the music + attachments: null + created: 2026-02-16 08:10:17.884324+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16db9cd50>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:17.885818+00:00 + duration_ms: 41.025 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_playing_the_music-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_playing_the_music-4.yaml new file mode 100644 index 000000000..7c90278b5 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-resume_playing_the_music-4.yaml @@ -0,0 +1,261 @@ +--- +uuid: 6c8c001a-781e-4361-8bdb-56f6006fe751 +task_id: home7_dk_media_player-resume_playing_the_music-4 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Resume playing the music + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Resume playing the music + context: + id: 01KHJR04P8D0BZFNPV0A9SHJ3G + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:18.184833+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:18.186466+00:00 + - role: user + content: Resume playing the music + attachments: null + created: 2026-02-16 08:10:18.184909+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16f115170>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:18.186479+00:00 + duration_ms: 41.0 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-set_outdoor_speakers_volume_to_50-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-set_outdoor_speakers_volume_to_50-0.yaml new file mode 100644 index 000000000..a1cc273c6 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-set_outdoor_speakers_volume_to_50-0.yaml @@ -0,0 +1,262 @@ +--- +uuid: f9e6bc0f-0849-410a-a738-eb2959af063c +task_id: home7_dk_media_player-set_outdoor_speakers_volume_to_50-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Set outdoor speakers volume to 50% + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.5 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.5 + got: + volume_level: 0.9 + conversation_trace: + - event_type: async_process + data: + text: Set outdoor speakers volume to 50% + context: + id: 01KHJR0F9Z860AR93ARDV4NJ5W + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:29.055645+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:29.060316+00:00 + - role: user + content: Set outdoor speakers volume to 50% + attachments: null + created: 2026-02-16 08:10:29.055719+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d5eb480>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:29.060331+00:00 + duration_ms: 43.329 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-set_outdoor_speakers_volume_to_50-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-set_outdoor_speakers_volume_to_50-1.yaml new file mode 100644 index 000000000..9d36ab009 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-set_outdoor_speakers_volume_to_50-1.yaml @@ -0,0 +1,262 @@ +--- +uuid: 020c1f3c-8648-4889-adbc-d9249f5c2093 +task_id: home7_dk_media_player-set_outdoor_speakers_volume_to_50-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Set outdoor speakers volume to 50% + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.5 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.5 + got: + volume_level: 0.9 + conversation_trace: + - event_type: async_process + data: + text: Set outdoor speakers volume to 50% + context: + id: 01KHJR0FK77DJ6P75KJTE83ZFN + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:29.351864+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:29.353591+00:00 + - role: user + content: Set outdoor speakers volume to 50% + attachments: null + created: 2026-02-16 08:10:29.351936+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17ddc0b40>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:29.353604+00:00 + duration_ms: 45.63 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-set_outdoor_speakers_volume_to_50-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-set_outdoor_speakers_volume_to_50-2.yaml new file mode 100644 index 000000000..4b8c46442 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-set_outdoor_speakers_volume_to_50-2.yaml @@ -0,0 +1,262 @@ +--- +uuid: c19c60cf-7ced-4261-8f1d-b0d2cadbb123 +task_id: home7_dk_media_player-set_outdoor_speakers_volume_to_50-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Set outdoor speakers volume to 50% + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.5 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.5 + got: + volume_level: 0.9 + conversation_trace: + - event_type: async_process + data: + text: Set outdoor speakers volume to 50% + context: + id: 01KHJR0FWRRT8QN4JTT0HD1VN5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:29.656876+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:29.658572+00:00 + - role: user + content: Set outdoor speakers volume to 50% + attachments: null + created: 2026-02-16 08:10:29.656945+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17daa7ed0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:29.658585+00:00 + duration_ms: 43.972 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-set_outdoor_speakers_volume_to_50-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-set_outdoor_speakers_volume_to_50-3.yaml new file mode 100644 index 000000000..068116696 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-set_outdoor_speakers_volume_to_50-3.yaml @@ -0,0 +1,262 @@ +--- +uuid: 73aa1d3d-07ed-4174-8971-caadeaed52e6 +task_id: home7_dk_media_player-set_outdoor_speakers_volume_to_50-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Set outdoor speakers volume to 50% + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.5 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.5 + got: + volume_level: 0.9 + conversation_trace: + - event_type: async_process + data: + text: Set outdoor speakers volume to 50% + context: + id: 01KHJR0G5JSAW32028WRSJYQ1R + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:29.938276+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:29.940455+00:00 + - role: user + content: Set outdoor speakers volume to 50% + attachments: null + created: 2026-02-16 08:10:29.938355+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17da32cf0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:29.940467+00:00 + duration_ms: 44.474 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-set_outdoor_speakers_volume_to_50-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-set_outdoor_speakers_volume_to_50-4.yaml new file mode 100644 index 000000000..86060641e --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-set_outdoor_speakers_volume_to_50-4.yaml @@ -0,0 +1,262 @@ +--- +uuid: bbc09aae-c7af-492b-aebf-f1073342c1ad +task_id: home7_dk_media_player-set_outdoor_speakers_volume_to_50-4 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Set outdoor speakers volume to 50% + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.5 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.5 + got: + volume_level: 0.9 + conversation_trace: + - event_type: async_process + data: + text: Set outdoor speakers volume to 50% + context: + id: 01KHJR0GFE7G8TGAV7PWHQ7MZ4 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:30.254118+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:30.256176+00:00 + - role: user + content: Set outdoor speakers volume to 50% + attachments: null + created: 2026-02-16 08:10:30.254191+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d3333d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:30.256189+00:00 + duration_ms: 44.815 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_song_on_outdoor_speakers-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_song_on_outdoor_speakers-0.yaml new file mode 100644 index 000000000..ab02389d3 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_song_on_outdoor_speakers-0.yaml @@ -0,0 +1,262 @@ +--- +uuid: 0f49a0eb-a051-496f-8155-14ff4de49cc2 +task_id: home7_dk_media_player-skip_song_on_outdoor_speakers-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Skip song on outdoor speakers + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Skip song on outdoor speakers + context: + id: 01KHJR08E4J3QSV9XSQ4S39H3J + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:22.020996+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:22.022658+00:00 + - role: user + content: Skip song on outdoor speakers + attachments: null + created: 2026-02-16 08:10:22.021069+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16edb0300>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:22.022672+00:00 + duration_ms: 44.054 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_song_on_outdoor_speakers-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_song_on_outdoor_speakers-1.yaml new file mode 100644 index 000000000..674bada67 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_song_on_outdoor_speakers-1.yaml @@ -0,0 +1,262 @@ +--- +uuid: 12f0b360-e46c-4bf4-a7ae-48599cd017f7 +task_id: home7_dk_media_player-skip_song_on_outdoor_speakers-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Skip song on outdoor speakers + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Skip song on outdoor speakers + context: + id: 01KHJR08YEMB5PKX9VXK04PYJM + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:22.542192+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:22.543769+00:00 + - role: user + content: Skip song on outdoor speakers + attachments: null + created: 2026-02-16 08:10:22.542290+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16d860460>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:22.543781+00:00 + duration_ms: 40.181 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_song_on_outdoor_speakers-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_song_on_outdoor_speakers-2.yaml new file mode 100644 index 000000000..9aaad9148 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_song_on_outdoor_speakers-2.yaml @@ -0,0 +1,262 @@ +--- +uuid: e043e358-ef12-45a6-b71a-c7b3a293da06 +task_id: home7_dk_media_player-skip_song_on_outdoor_speakers-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Skip song on outdoor speakers + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Skip song on outdoor speakers + context: + id: 01KHJR097QPCB5QAV3R7JZ7Y16 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:22.839427+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:22.841020+00:00 + - role: user + content: Skip song on outdoor speakers + attachments: null + created: 2026-02-16 08:10:22.839501+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c825850>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:22.841032+00:00 + duration_ms: 129.358 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_song_on_outdoor_speakers-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_song_on_outdoor_speakers-3.yaml new file mode 100644 index 000000000..5bc74193b --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_song_on_outdoor_speakers-3.yaml @@ -0,0 +1,262 @@ +--- +uuid: 96b97562-d6ef-4124-81d1-c45d1b019545 +task_id: home7_dk_media_player-skip_song_on_outdoor_speakers-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Skip song on outdoor speakers + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Skip song on outdoor speakers + context: + id: 01KHJR09KZ6MBK6E0H5ZBZM2VE + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:23.231420+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:23.236772+00:00 + - role: user + content: Skip song on outdoor speakers + attachments: null + created: 2026-02-16 08:10:23.231491+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e2941a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:23.236787+00:00 + duration_ms: 131.24 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_song_on_outdoor_speakers-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_song_on_outdoor_speakers-4.yaml new file mode 100644 index 000000000..6d98f5409 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_song_on_outdoor_speakers-4.yaml @@ -0,0 +1,262 @@ +--- +uuid: c3c6c3c4-4d6e-4859-8699-f76352d767b2 +task_id: home7_dk_media_player-skip_song_on_outdoor_speakers-4 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Skip song on outdoor speakers + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Skip song on outdoor speakers + context: + id: 01KHJR09Z87X3WA48SQETZB5N8 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:23.592168+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:23.598559+00:00 + - role: user + content: Skip song on outdoor speakers + attachments: null + created: 2026-02-16 08:10:23.592262+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16dfb22a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:23.598572+00:00 + duration_ms: 138.957 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track-0.yaml new file mode 100644 index 000000000..95ff02213 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track-0.yaml @@ -0,0 +1,262 @@ +--- +uuid: c8262c0b-1d69-42f5-adc7-5d7b37c66f96 +task_id: home7_dk_media_player-skip_to_the_next_track-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Skip to the next track + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Skip to the next track + context: + id: 01KHJR0DXMDTHJ78J701R88EYJ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:27.636453+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:27.637982+00:00 + - role: user + content: Skip to the next track + attachments: null + created: 2026-02-16 08:10:27.636528+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16d7a7a00>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:27.637995+00:00 + duration_ms: 37.75 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track-1.yaml new file mode 100644 index 000000000..cf55d7bd1 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track-1.yaml @@ -0,0 +1,262 @@ +--- +uuid: f44e85d7-f04e-4960-a477-324cc2f11601 +task_id: home7_dk_media_player-skip_to_the_next_track-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Skip to the next track + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Skip to the next track + context: + id: 01KHJR0E5YKEA9XGBFKAJ6PRVZ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:27.903092+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:27.907486+00:00 + - role: user + content: Skip to the next track + attachments: null + created: 2026-02-16 08:10:27.903165+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16eeebd70>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:27.907501+00:00 + duration_ms: 44.623 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track-2.yaml new file mode 100644 index 000000000..f05ef1bc4 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track-2.yaml @@ -0,0 +1,262 @@ +--- +uuid: eab8ff08-fe37-4fa0-a134-97324e5ee5fd +task_id: home7_dk_media_player-skip_to_the_next_track-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Skip to the next track + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Skip to the next track + context: + id: 01KHJR0EEP34SRTA57K286QGWT + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:28.182119+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:28.186694+00:00 + - role: user + content: Skip to the next track + attachments: null + created: 2026-02-16 08:10:28.182194+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16d820e00>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:28.186709+00:00 + duration_ms: 44.107 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track-3.yaml new file mode 100644 index 000000000..278810523 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track-3.yaml @@ -0,0 +1,262 @@ +--- +uuid: 6fcd4786-82f4-4436-aa96-2d91240abd7c +task_id: home7_dk_media_player-skip_to_the_next_track-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Skip to the next track + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Skip to the next track + context: + id: 01KHJR0EQS0ME21YYYEZX266H5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:28.473107+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:28.474772+00:00 + - role: user + content: Skip to the next track + attachments: null + created: 2026-02-16 08:10:28.473180+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16da8bed0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:28.474786+00:00 + duration_ms: 44.679 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track-4.yaml new file mode 100644 index 000000000..88885082e --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track-4.yaml @@ -0,0 +1,262 @@ +--- +uuid: 6b38d2aa-4405-4326-a345-9a6cb4bf12e9 +task_id: home7_dk_media_player-skip_to_the_next_track-4 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Skip to the next track + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Skip to the next track + context: + id: 01KHJR0F0Z283YN7TKX0097AEA + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:28.767975+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:28.770525+00:00 + - role: user + content: Skip to the next track + attachments: null + created: 2026-02-16 08:10:28.768052+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16df81bc0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:28.770539+00:00 + duration_ms: 40.42 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-0.yaml new file mode 100644 index 000000000..578e10c03 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-0.yaml @@ -0,0 +1,262 @@ +--- +uuid: 2bb6d3da-b061-4dcc-bbc4-c407a0bddea6 +task_id: home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Skip to the next track on the outdoor speakers + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Skip to the next track on the outdoor speakers + context: + id: 01KHJR0AC660F1GRCV1M9YW7HA + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:24.006988+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:24.008559+00:00 + - role: user + content: Skip to the next track on the outdoor speakers + attachments: null + created: 2026-02-16 08:10:24.007062+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e05cbf0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:24.008571+00:00 + duration_ms: 129.476 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-1.yaml new file mode 100644 index 000000000..ca845ea62 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-1.yaml @@ -0,0 +1,262 @@ +--- +uuid: 031956fb-0f3e-411f-9ce7-da25e1d63814 +task_id: home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Skip to the next track on the outdoor speakers + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Skip to the next track on the outdoor speakers + context: + id: 01KHJR0ARB9EAVH3EQKY8QFDQ4 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:24.396092+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:24.397761+00:00 + - role: user + content: Skip to the next track on the outdoor speakers + attachments: null + created: 2026-02-16 08:10:24.396165+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17db41170>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:24.397772+00:00 + duration_ms: 144.561 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-2.yaml new file mode 100644 index 000000000..1a2eaa5e4 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-2.yaml @@ -0,0 +1,262 @@ +--- +uuid: 32855e62-dc93-4d83-8393-8db1366e70bb +task_id: home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Skip to the next track on the outdoor speakers + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Skip to the next track on the outdoor speakers + context: + id: 01KHJR0B4BPP5BYRCE40K6VHSN + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:24.779714+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:24.782967+00:00 + - role: user + content: Skip to the next track on the outdoor speakers + attachments: null + created: 2026-02-16 08:10:24.779789+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d0b2140>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:24.782981+00:00 + duration_ms: 42.528 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-3.yaml new file mode 100644 index 000000000..4c3299038 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-3.yaml @@ -0,0 +1,262 @@ +--- +uuid: a61d2fc6-4628-4485-b567-5230a214cdd8 +task_id: home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Skip to the next track on the outdoor speakers + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Skip to the next track on the outdoor speakers + context: + id: 01KHJR0BDJJ6QFMM9WK9BASMHC + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:25.074649+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:25.077213+00:00 + - role: user + content: Skip to the next track on the outdoor speakers + attachments: null + created: 2026-02-16 08:10:25.074722+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17dac8670>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:25.077240+00:00 + duration_ms: 135.788 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-4.yaml new file mode 100644 index 000000000..619802e99 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-4.yaml @@ -0,0 +1,262 @@ +--- +uuid: f900e533-c9c4-4c89-941b-2d8c161455c6 +task_id: home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-4 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Skip to the next track on the outdoor speakers + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + media_track: 2 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + media_track: 2 + got: + media_track: 1 + conversation_trace: + - event_type: async_process + data: + text: Skip to the next track on the outdoor speakers + context: + id: 01KHJR0BSD2K8W3P9S1X85KE0N + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:25.453472+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:25.456042+00:00 + - role: user + content: Skip to the next track on the outdoor speakers + attachments: null + created: 2026-02-16 08:10:25.453547+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d00f950>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:25.456056+00:00 + duration_ms: 41.279 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_music-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_music-0.yaml new file mode 100644 index 000000000..1e841c86b --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_music-0.yaml @@ -0,0 +1,261 @@ +--- +uuid: d8d436d4-ad61-4f57-8252-b3c48bfa32c2 +task_id: home7_dk_media_player-turn_off_the_music-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Turn off the music + expect_changes: + media_player.outdoor_speakers: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: 'off' + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Turn off the music + context: + id: 01KHJR0Q2F885PJ96AG64A4A41 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:37.007358+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:37.013646+00:00 + - role: user + content: Turn off the music + attachments: null + created: 2026-02-16 08:10:37.007430+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c7d75e0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:37.013659+00:00 + duration_ms: 133.178 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_music-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_music-1.yaml new file mode 100644 index 000000000..bdbe8e527 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_music-1.yaml @@ -0,0 +1,261 @@ +--- +uuid: 2883c6af-7e67-430b-8e3b-2c07c9848cd6 +task_id: home7_dk_media_player-turn_off_the_music-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Turn off the music + expect_changes: + media_player.outdoor_speakers: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: 'off' + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Turn off the music + context: + id: 01KHJR0QEH3RYZ4D1FF0ZFENVH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:37.393159+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:37.394739+00:00 + - role: user + content: Turn off the music + attachments: null + created: 2026-02-16 08:10:37.393263+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16ddc7270>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:37.394752+00:00 + duration_ms: 42.839 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_music-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_music-2.yaml new file mode 100644 index 000000000..3e1637164 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_music-2.yaml @@ -0,0 +1,261 @@ +--- +uuid: 60e48b21-ce7f-488a-a5e5-069677a7c543 +task_id: home7_dk_media_player-turn_off_the_music-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Turn off the music + expect_changes: + media_player.outdoor_speakers: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: 'off' + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Turn off the music + context: + id: 01KHJR0QQQ368R2474F6T3RJVT + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:37.687916+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:37.689460+00:00 + - role: user + content: Turn off the music + attachments: null + created: 2026-02-16 08:10:37.687988+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d271a60>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:37.689472+00:00 + duration_ms: 42.042 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_music-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_music-3.yaml new file mode 100644 index 000000000..0fe31a414 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_music-3.yaml @@ -0,0 +1,261 @@ +--- +uuid: 62e870f3-9cd7-4a3f-b782-2815f8ac68b4 +task_id: home7_dk_media_player-turn_off_the_music-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Turn off the music + expect_changes: + media_player.outdoor_speakers: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: 'off' + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Turn off the music + context: + id: 01KHJR0R1FZJ0BT9ZH46MM4WYE + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:37.999432+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:38.001006+00:00 + - role: user + content: Turn off the music + attachments: null + created: 2026-02-16 08:10:37.999506+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e285640>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:38.001019+00:00 + duration_ms: 80.736 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_music-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_music-4.yaml new file mode 100644 index 000000000..104a3748a --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_music-4.yaml @@ -0,0 +1,261 @@ +--- +uuid: fe6a0ddf-2bc7-481a-8031-84877a08dfd1 +task_id: home7_dk_media_player-turn_off_the_music-4 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Turn off the music + expect_changes: + media_player.outdoor_speakers: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: 'off' + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Turn off the music + context: + id: 01KHJR0RBQZ4ZVD0A0PE9JQHGB + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:38.327462+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:38.329007+00:00 + - role: user + content: Turn off the music + attachments: null + created: 2026-02-16 08:10:38.327534+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16f14d0c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:38.329019+00:00 + duration_ms: 121.589 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_outdoor_speakers-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_outdoor_speakers-0.yaml new file mode 100644 index 000000000..5cc1c2dc5 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_outdoor_speakers-0.yaml @@ -0,0 +1,261 @@ +--- +uuid: df55d83b-b301-46e5-8d7a-24f58d3c6e89 +task_id: home7_dk_media_player-turn_off_the_outdoor_speakers-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Turn off the Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: 'off' + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Turn off the Outdoor Speakers + context: + id: 01KHJR0NHKVN4N3FQ3VY26WDS9 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:35.443454+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:35.445033+00:00 + - role: user + content: Turn off the Outdoor Speakers + attachments: null + created: 2026-02-16 08:10:35.443528+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16f7eafb0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:35.445044+00:00 + duration_ms: 41.778 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_outdoor_speakers-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_outdoor_speakers-1.yaml new file mode 100644 index 000000000..ec5a5a7fc --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_outdoor_speakers-1.yaml @@ -0,0 +1,261 @@ +--- +uuid: f2b7a173-2dd4-4987-ad03-3b865d14bdf7 +task_id: home7_dk_media_player-turn_off_the_outdoor_speakers-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Turn off the Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: 'off' + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Turn off the Outdoor Speakers + context: + id: 01KHJR0NTQ5PE9AKG3BCG249Z8 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:35.735332+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:35.736868+00:00 + - role: user + content: Turn off the Outdoor Speakers + attachments: null + created: 2026-02-16 08:10:35.735406+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16dac80f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:35.736881+00:00 + duration_ms: 120.166 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_outdoor_speakers-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_outdoor_speakers-2.yaml new file mode 100644 index 000000000..98d7c867d --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_outdoor_speakers-2.yaml @@ -0,0 +1,261 @@ +--- +uuid: 6edddd7f-5285-4d79-9648-264ddbd54207 +task_id: home7_dk_media_player-turn_off_the_outdoor_speakers-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Turn off the Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: 'off' + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Turn off the Outdoor Speakers + context: + id: 01KHJR0P67MY30TENYF2W767PW + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:36.104029+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:36.105927+00:00 + - role: user + content: Turn off the Outdoor Speakers + attachments: null + created: 2026-02-16 08:10:36.104102+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c9896f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:36.105938+00:00 + duration_ms: 42.721 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_outdoor_speakers-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_outdoor_speakers-3.yaml new file mode 100644 index 000000000..35751419e --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_outdoor_speakers-3.yaml @@ -0,0 +1,261 @@ +--- +uuid: 78697a0d-94cb-4099-8bb3-9c272ea73b3f +task_id: home7_dk_media_player-turn_off_the_outdoor_speakers-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Turn off the Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: 'off' + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Turn off the Outdoor Speakers + context: + id: 01KHJR0PG83GH9V5JDYCMGCJ8H + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:36.424675+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:36.429353+00:00 + - role: user + content: Turn off the Outdoor Speakers + attachments: null + created: 2026-02-16 08:10:36.424749+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16daf5bc0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:36.429366+00:00 + duration_ms: 51.974 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_outdoor_speakers-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_outdoor_speakers-4.yaml new file mode 100644 index 000000000..d6b4ae6c4 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_off_the_outdoor_speakers-4.yaml @@ -0,0 +1,261 @@ +--- +uuid: b599295a-8978-4fb4-b5ab-9d4fff850e69 +task_id: home7_dk_media_player-turn_off_the_outdoor_speakers-4 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Turn off the Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: 'off' + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: 'off' + got: + state: playing + conversation_trace: + - event_type: async_process + data: + text: Turn off the Outdoor Speakers + context: + id: 01KHJR0PSHQXWC277PWB033Y1Z + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:36.721401+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:36.723032+00:00 + - role: user + content: Turn off the Outdoor Speakers + attachments: null + created: 2026-02-16 08:10:36.721475+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16f0e6560>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:36.723045+00:00 + duration_ms: 43.203 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_the_volume_down_to_50-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_the_volume_down_to_50-0.yaml new file mode 100644 index 000000000..b72bf26cc --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_the_volume_down_to_50-0.yaml @@ -0,0 +1,262 @@ +--- +uuid: 26bc6b29-beda-471a-a80d-6cd18b5c257e +task_id: home7_dk_media_player-turn_the_volume_down_to_50-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Turn the volume down to 50% + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.5 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.5 + got: + volume_level: 0.9 + conversation_trace: + - event_type: async_process + data: + text: Turn the volume down to 50% + context: + id: 01KHJR0GRJATB3SB64TNGNAPM9 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:30.546588+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:30.549594+00:00 + - role: user + content: Turn the volume down to 50% + attachments: null + created: 2026-02-16 08:10:30.546659+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16f55d0c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:30.549610+00:00 + duration_ms: 46.951 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_the_volume_down_to_50-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_the_volume_down_to_50-1.yaml new file mode 100644 index 000000000..9083882c7 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_the_volume_down_to_50-1.yaml @@ -0,0 +1,262 @@ +--- +uuid: eb4b477c-4be4-419f-b15c-06d674466bb7 +task_id: home7_dk_media_player-turn_the_volume_down_to_50-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Turn the volume down to 50% + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.5 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.5 + got: + volume_level: 0.9 + conversation_trace: + - event_type: async_process + data: + text: Turn the volume down to 50% + context: + id: 01KHJR0H34S7HQAJ0897F8MCP0 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:30.884670+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:30.888203+00:00 + - role: user + content: Turn the volume down to 50% + attachments: null + created: 2026-02-16 08:10:30.884745+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16f9538a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:30.888217+00:00 + duration_ms: 48.907 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_the_volume_down_to_50-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_the_volume_down_to_50-2.yaml new file mode 100644 index 000000000..85829d15b --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_the_volume_down_to_50-2.yaml @@ -0,0 +1,262 @@ +--- +uuid: 0d3ade38-827e-4253-9f2e-ced2f70e9c15 +task_id: home7_dk_media_player-turn_the_volume_down_to_50-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Turn the volume down to 50% + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.5 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.5 + got: + volume_level: 0.9 + conversation_trace: + - event_type: async_process + data: + text: Turn the volume down to 50% + context: + id: 01KHJR0HCVSHFC3HY89GZY7RZZ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:31.195257+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:31.196892+00:00 + - role: user + content: Turn the volume down to 50% + attachments: null + created: 2026-02-16 08:10:31.195334+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d33dc70>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:31.196905+00:00 + duration_ms: 41.724 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_the_volume_down_to_50-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_the_volume_down_to_50-3.yaml new file mode 100644 index 000000000..e74811b4f --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_the_volume_down_to_50-3.yaml @@ -0,0 +1,262 @@ +--- +uuid: 0772034a-b300-4b4b-919c-5a4558faf3de +task_id: home7_dk_media_player-turn_the_volume_down_to_50-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Turn the volume down to 50% + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.5 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.5 + got: + volume_level: 0.9 + conversation_trace: + - event_type: async_process + data: + text: Turn the volume down to 50% + context: + id: 01KHJR0HPBPTP7W6DKWJ888XB5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:31.499436+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:31.503643+00:00 + - role: user + content: Turn the volume down to 50% + attachments: null + created: 2026-02-16 08:10:31.499510+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d33dfe0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:31.503657+00:00 + duration_ms: 48.454 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_the_volume_down_to_50-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_the_volume_down_to_50-4.yaml new file mode 100644 index 000000000..4bf37fe30 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-turn_the_volume_down_to_50-4.yaml @@ -0,0 +1,262 @@ +--- +uuid: d4a30737-3cfd-42f5-90b5-ba4ee463735d +task_id: home7_dk_media_player-turn_the_volume_down_to_50-4 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Turn the volume down to 50% + expect_changes: + media_player.outdoor_speakers: + state: null + attributes: + volume_level: 0.5 +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + volume_level: 0.5 + got: + volume_level: 0.9 + conversation_trace: + - event_type: async_process + data: + text: Turn the volume down to 50% + context: + id: 01KHJR0J0C9F41JFPH8AES9AK3 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:31.821147+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:31.823436+00:00 + - role: user + content: Turn the volume down to 50% + attachments: null + created: 2026-02-16 08:10:31.821307+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16e065fe0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:31.823458+00:00 + duration_ms: 45.301 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_outdoor_speakers-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_outdoor_speakers-0.yaml new file mode 100644 index 000000000..01b8304b1 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_outdoor_speakers-0.yaml @@ -0,0 +1,261 @@ +--- +uuid: 4bf7bd4c-4881-4ada-93a7-84cede35592b +task_id: home7_dk_media_player-unpause_the_outdoor_speakers-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Unpause the Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Unpause the Outdoor Speakers + context: + id: 01KHJR04ZPV0HCB4BR83824QRA + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:18.486838+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:18.491093+00:00 + - role: user + content: Unpause the Outdoor Speakers + attachments: null + created: 2026-02-16 08:10:18.486909+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16db1a8d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:18.491108+00:00 + duration_ms: 130.257 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_outdoor_speakers-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_outdoor_speakers-1.yaml new file mode 100644 index 000000000..d0e59de5f --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_outdoor_speakers-1.yaml @@ -0,0 +1,261 @@ +--- +uuid: bc42c281-919e-4f2b-ab4a-d821f77f1489 +task_id: home7_dk_media_player-unpause_the_outdoor_speakers-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Unpause the Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Unpause the Outdoor Speakers + context: + id: 01KHJR05C04BDVZJJ0PE75ZNF6 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:18.880450+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:18.882107+00:00 + - role: user + content: Unpause the Outdoor Speakers + attachments: null + created: 2026-02-16 08:10:18.880524+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17db6af00>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:18.882120+00:00 + duration_ms: 132.635 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_outdoor_speakers-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_outdoor_speakers-2.yaml new file mode 100644 index 000000000..89b6416db --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_outdoor_speakers-2.yaml @@ -0,0 +1,261 @@ +--- +uuid: a3725996-fd34-4141-858f-cee1effb4ea3 +task_id: home7_dk_media_player-unpause_the_outdoor_speakers-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Unpause the Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Unpause the Outdoor Speakers + context: + id: 01KHJR05R6QGB9JJJGS69C5QZS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:19.270337+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:19.271852+00:00 + - role: user + content: Unpause the Outdoor Speakers + attachments: null + created: 2026-02-16 08:10:19.270413+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16ef14a90>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:19.271864+00:00 + duration_ms: 39.192 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_outdoor_speakers-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_outdoor_speakers-3.yaml new file mode 100644 index 000000000..f1a38a49b --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_outdoor_speakers-3.yaml @@ -0,0 +1,261 @@ +--- +uuid: b4a632a1-e92b-46f8-a6b7-72273a941da3 +task_id: home7_dk_media_player-unpause_the_outdoor_speakers-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Unpause the Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Unpause the Outdoor Speakers + context: + id: 01KHJR06107BW4N55V986B4DWC + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:19.552815+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:19.556045+00:00 + - role: user + content: Unpause the Outdoor Speakers + attachments: null + created: 2026-02-16 08:10:19.552887+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16c5933d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:19.556059+00:00 + duration_ms: 43.727 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_outdoor_speakers-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_outdoor_speakers-4.yaml new file mode 100644 index 000000000..8942b9336 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_outdoor_speakers-4.yaml @@ -0,0 +1,261 @@ +--- +uuid: 125ca8af-382c-4b85-ae1b-5aed035c8eed +task_id: home7_dk_media_player-unpause_the_outdoor_speakers-4 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Unpause the Outdoor Speakers + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Unpause the Outdoor Speakers + context: + id: 01KHJR06AA95YP5R4G7ETZFK32 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:19.850644+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:19.853895+00:00 + - role: user + content: Unpause the Outdoor Speakers + attachments: null + created: 2026-02-16 08:10:19.850715+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d297ab0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:19.853908+00:00 + duration_ms: 42.197 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_rooftop_terrace_music-0.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_rooftop_terrace_music-0.yaml new file mode 100644 index 000000000..7cc110b01 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_rooftop_terrace_music-0.yaml @@ -0,0 +1,261 @@ +--- +uuid: f2da5fc3-74e7-4939-836b-306b47bdbd5f +task_id: home7_dk_media_player-unpause_the_rooftop_terrace_music-0 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Unpause the Rooftop Terrace music + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Unpause the Rooftop Terrace music + context: + id: 01KHJR06KK3BFV3G2E0GSZRFHY + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:20.147868+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:20.150944+00:00 + - role: user + content: Unpause the Rooftop Terrace music + attachments: null + created: 2026-02-16 08:10:20.147940+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d6ddbc0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:20.150959+00:00 + duration_ms: 138.719 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_rooftop_terrace_music-1.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_rooftop_terrace_music-1.yaml new file mode 100644 index 000000000..15bd55f32 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_rooftop_terrace_music-1.yaml @@ -0,0 +1,261 @@ +--- +uuid: 5c8b28f1-ae81-4d3f-9a12-b49f97d9c83e +task_id: home7_dk_media_player-unpause_the_rooftop_terrace_music-1 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Unpause the Rooftop Terrace music + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Unpause the Rooftop Terrace music + context: + id: 01KHJR070H1ZBN1ZCH4MXQSMH4 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:20.561135+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:20.564370+00:00 + - role: user + content: Unpause the Rooftop Terrace music + attachments: null + created: 2026-02-16 08:10:20.561208+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17d2bbed0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:20.564384+00:00 + duration_ms: 142.121 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_rooftop_terrace_music-2.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_rooftop_terrace_music-2.yaml new file mode 100644 index 000000000..d1ce4e50d --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_rooftop_terrace_music-2.yaml @@ -0,0 +1,261 @@ +--- +uuid: 54282ca0-531d-47e6-aefa-ca4623fbd0dd +task_id: home7_dk_media_player-unpause_the_rooftop_terrace_music-2 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Unpause the Rooftop Terrace music + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Unpause the Rooftop Terrace music + context: + id: 01KHJR07CJNMGX978STZ3F82QS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:20.947036+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:20.953220+00:00 + - role: user + content: Unpause the Rooftop Terrace music + attachments: null + created: 2026-02-16 08:10:20.947110+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16f3d45c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:20.953247+00:00 + duration_ms: 130.439 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_rooftop_terrace_music-3.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_rooftop_terrace_music-3.yaml new file mode 100644 index 000000000..3a131d0a6 --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_rooftop_terrace_music-3.yaml @@ -0,0 +1,261 @@ +--- +uuid: 634b54fe-4bcf-4bbe-8720-c41558b0d06c +task_id: home7_dk_media_player-unpause_the_rooftop_terrace_music-3 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Unpause the Rooftop Terrace music + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Unpause the Rooftop Terrace music + context: + id: 01KHJR07SK6GCC2TES9EQDCPNH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:21.363684+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:21.374339+00:00 + - role: user + content: Unpause the Rooftop Terrace music + attachments: null + created: 2026-02-16 08:10:21.363758+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc16f660880>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:21.374356+00:00 + duration_ms: 48.076 + tries: 1 diff --git a/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_rooftop_terrace_music-4.yaml b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_rooftop_terrace_music-4.yaml new file mode 100644 index 000000000..2b913f39d --- /dev/null +++ b/reports/assist/2026.2.2/gemma-3-27b-it/home7_dk_media_player-unpause_the_rooftop_terrace_music-4.yaml @@ -0,0 +1,261 @@ +--- +uuid: b8aa7cc7-bfc1-44cd-92c6-c009005c4358 +task_id: home7_dk_media_player-unpause_the_rooftop_terrace_music-4 +model_id: gemma-3-27b-it +category: media-player +task: + input_text: Unpause the Rooftop Terrace music + expect_changes: + media_player.outdoor_speakers: + state: playing + attributes: null +response: Error talking to API +context: + unexpected_states: + media_player.outdoor_speakers: + expected: + state: playing + got: + state: paused + conversation_trace: + - event_type: async_process + data: + text: Unpause the Rooftop Terrace music + context: + id: 01KHJR082GGGN9HHQ8947NQGT1 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:10:21.648887+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 1 Thermostat + domain: climate + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Humidity + domain: sensor + areas: Bedroom 1 + - names: Bedroom 1 Thermostat Temperature + domain: sensor + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 2 Thermostat + domain: climate + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Humidity + domain: sensor + areas: Bedroom 2 + - names: Bedroom 2 Thermostat Temperature + domain: sensor + areas: Bedroom 2 + - names: Coffee Maker + domain: switch + areas: Kitchen + - names: Coffee Maker Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Living Room Thermostat + domain: climate + areas: Living Room + - names: Living Room Thermostat Humidity + domain: sensor + areas: Living Room + - names: Living Room Thermostat Temperature + domain: sensor + areas: Living Room + - names: Outdoor Speakers + domain: media_player + areas: Rooftop Terrace + - names: Smart Curtain + domain: cover + areas: Living Room + - names: Smart Curtain Battery + domain: sensor + areas: Living Room + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + created: 2026-02-16 08:10:21.650524+00:00 + - role: user + content: Unpause the Rooftop Terrace music + attachments: null + created: 2026-02-16 08:10:21.648958+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7fc17ddac1a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7fc17c2bdf30>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''water'', ''gas'', ''outlet'', ''switch'', ''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''identify'', ''restart'', ''update''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:10:21.650537+00:00 + duration_ms: 131.636 + tries: 1 diff --git a/reports/assist/2026.2.2/report.csv b/reports/assist/2026.2.2/report.csv new file mode 100644 index 000000000..fcbcb64d6 --- /dev/null +++ b/reports/assist/2026.2.2/report.csv @@ -0,0 +1,461 @@ +task_id,model_id,category,text,tool_call,response,task_name,label,details +"dom1_pl_lights-kitchen_light_off-0","gemma-3-27b-it","light","Kitchen light off","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.kitchen_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-kitchen_light_off-1","gemma-3-27b-it","light","Kitchen light off","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.kitchen_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-kitchen_light_off-2","gemma-3-27b-it","light","Kitchen light off","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.kitchen_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-kitchen_light_off-3","gemma-3-27b-it","light","Kitchen light off","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.kitchen_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-kitchen_light_off-4","gemma-3-27b-it","light","Kitchen light off","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.kitchen_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-please_turn_off_the_light-0","gemma-3-27b-it","light","Please turn off the light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-please_turn_off_the_light-1","gemma-3-27b-it","light","Please turn off the light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-please_turn_off_the_light-2","gemma-3-27b-it","light","Please turn off the light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-please_turn_off_the_light-3","gemma-3-27b-it","light","Please turn off the light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-please_turn_off_the_light-4","gemma-3-27b-it","light","Please turn off the light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-please_turn_on_the_kitchen_light-0","gemma-3-27b-it","light","Please turn on the kitchen light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.kitchen_light': {'expected': {'state': 'on'}, 'got': {'state': 'off'}}}" +"dom1_pl_lights-please_turn_on_the_kitchen_light-1","gemma-3-27b-it","light","Please turn on the kitchen light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.kitchen_light': {'expected': {'state': 'on'}, 'got': {'state': 'off'}}}" +"dom1_pl_lights-please_turn_on_the_kitchen_light-2","gemma-3-27b-it","light","Please turn on the kitchen light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.kitchen_light': {'expected': {'state': 'on'}, 'got': {'state': 'off'}}}" +"dom1_pl_lights-please_turn_on_the_kitchen_light-3","gemma-3-27b-it","light","Please turn on the kitchen light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.kitchen_light': {'expected': {'state': 'on'}, 'got': {'state': 'off'}}}" +"dom1_pl_lights-please_turn_on_the_kitchen_light-4","gemma-3-27b-it","light","Please turn on the kitchen light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.kitchen_light': {'expected': {'state': 'on'}, 'got': {'state': 'off'}}}" +"dom1_pl_lights-shut_off_the_upstairs_lights_please-0","gemma-3-27b-it","light","Shut off the upstairs lights please","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.bedroom_2_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}, 'light.bedroom_3_light': {'expected'...ate': 'off'}, 'got': {'state': 'on'}}, 'light.bedroom_4_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-shut_off_the_upstairs_lights_please-1","gemma-3-27b-it","light","Shut off the upstairs lights please","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.bedroom_2_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}, 'light.bedroom_3_light': {'expected'...ate': 'off'}, 'got': {'state': 'on'}}, 'light.bedroom_4_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-shut_off_the_upstairs_lights_please-2","gemma-3-27b-it","light","Shut off the upstairs lights please","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.bedroom_2_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}, 'light.bedroom_3_light': {'expected'...ate': 'off'}, 'got': {'state': 'on'}}, 'light.bedroom_4_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-shut_off_the_upstairs_lights_please-3","gemma-3-27b-it","light","Shut off the upstairs lights please","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.bedroom_2_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}, 'light.bedroom_3_light': {'expected'...ate': 'off'}, 'got': {'state': 'on'}}, 'light.bedroom_4_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-shut_off_the_upstairs_lights_please-4","gemma-3-27b-it","light","Shut off the upstairs lights please","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.bedroom_2_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}, 'light.bedroom_3_light': {'expected'...ate': 'off'}, 'got': {'state': 'on'}}, 'light.bedroom_4_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-turn_off_the_light-0","gemma-3-27b-it","light","Turn off the light","","Error talking to API","eval-test_expected_states","Good","" +"dom1_pl_lights-turn_off_the_light-1","gemma-3-27b-it","light","Turn off the light","","Error talking to API","eval-test_expected_states","Good","" +"dom1_pl_lights-turn_off_the_light-2","gemma-3-27b-it","light","Turn off the light","","Error talking to API","eval-test_expected_states","Good","" +"dom1_pl_lights-turn_off_the_light-3","gemma-3-27b-it","light","Turn off the light","","Error talking to API","eval-test_expected_states","Good","" +"dom1_pl_lights-turn_off_the_light-4","gemma-3-27b-it","light","Turn off the light","","Error talking to API","eval-test_expected_states","Good","" +"dom1_pl_lights-turn_off_the_lights_upstairs-0","gemma-3-27b-it","light","Turn off the lights upstairs","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.bedroom_2_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}, 'light.bedroom_3_light': {'expected'...ate': 'off'}, 'got': {'state': 'on'}}, 'light.bedroom_4_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-turn_off_the_lights_upstairs-1","gemma-3-27b-it","light","Turn off the lights upstairs","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.bedroom_2_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}, 'light.bedroom_3_light': {'expected'...ate': 'off'}, 'got': {'state': 'on'}}, 'light.bedroom_4_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-turn_off_the_lights_upstairs-2","gemma-3-27b-it","light","Turn off the lights upstairs","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.bedroom_2_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}, 'light.bedroom_3_light': {'expected'...ate': 'off'}, 'got': {'state': 'on'}}, 'light.bedroom_4_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-turn_off_the_lights_upstairs-3","gemma-3-27b-it","light","Turn off the lights upstairs","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.bedroom_2_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}, 'light.bedroom_3_light': {'expected'...ate': 'off'}, 'got': {'state': 'on'}}, 'light.bedroom_4_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-turn_off_the_lights_upstairs-4","gemma-3-27b-it","light","Turn off the lights upstairs","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.bedroom_2_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}, 'light.bedroom_3_light': {'expected'...ate': 'off'}, 'got': {'state': 'on'}}, 'light.bedroom_4_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-0","gemma-3-27b-it","light","Turn off the living room light then turn on the kitchen light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-1","gemma-3-27b-it","light","Turn off the living room light then turn on the kitchen light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-2","gemma-3-27b-it","light","Turn off the living room light then turn on the kitchen light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-3","gemma-3-27b-it","light","Turn off the living room light then turn on the kitchen light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-4","gemma-3-27b-it","light","Turn off the living room light then turn on the kitchen light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-0","gemma-3-27b-it","light","Turn on all the bedroom lights upstairs","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.bedroom_2_light': {'expected': {'state': 'on'}, 'got': {'state': 'off'}}, 'light.bedroom_3_light': {'expected'...ate': 'on'}, 'got': {'state': 'off'}}, 'light.bedroom_4_light': {'expected': {'state': 'on'}, 'got': {'state': 'off'}}}" +"dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-1","gemma-3-27b-it","light","Turn on all the bedroom lights upstairs","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.bedroom_2_light': {'expected': {'state': 'on'}, 'got': {'state': 'off'}}, 'light.bedroom_3_light': {'expected'...ate': 'on'}, 'got': {'state': 'off'}}, 'light.bedroom_4_light': {'expected': {'state': 'on'}, 'got': {'state': 'off'}}}" +"dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-2","gemma-3-27b-it","light","Turn on all the bedroom lights upstairs","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.bedroom_2_light': {'expected': {'state': 'on'}, 'got': {'state': 'off'}}, 'light.bedroom_3_light': {'expected'...ate': 'on'}, 'got': {'state': 'off'}}, 'light.bedroom_4_light': {'expected': {'state': 'on'}, 'got': {'state': 'off'}}}" +"dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-3","gemma-3-27b-it","light","Turn on all the bedroom lights upstairs","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.bedroom_2_light': {'expected': {'state': 'on'}, 'got': {'state': 'off'}}, 'light.bedroom_3_light': {'expected'...ate': 'on'}, 'got': {'state': 'off'}}, 'light.bedroom_4_light': {'expected': {'state': 'on'}, 'got': {'state': 'off'}}}" +"dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-4","gemma-3-27b-it","light","Turn on all the bedroom lights upstairs","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.bedroom_2_light': {'expected': {'state': 'on'}, 'got': {'state': 'off'}}, 'light.bedroom_3_light': {'expected'...ate': 'on'}, 'got': {'state': 'off'}}, 'light.bedroom_4_light': {'expected': {'state': 'on'}, 'got': {'state': 'off'}}}" +"dom1_pl_lights-turn_on_all_the_upstairs_lights-0","gemma-3-27b-it","light","Turn on all the upstairs lights","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.bedroom_2_light': {'expected': {'state': 'on'}, 'got': {'state': 'off'}}, 'light.bedroom_3_light': {'expected'...ate': 'on'}, 'got': {'state': 'off'}}, 'light.bedroom_4_light': {'expected': {'state': 'on'}, 'got': {'state': 'off'}}}" +"dom1_pl_lights-turn_on_all_the_upstairs_lights-1","gemma-3-27b-it","light","Turn on all the upstairs lights","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.bedroom_2_light': {'expected': {'state': 'on'}, 'got': {'state': 'off'}}, 'light.bedroom_3_light': {'expected'...ate': 'on'}, 'got': {'state': 'off'}}, 'light.bedroom_4_light': {'expected': {'state': 'on'}, 'got': {'state': 'off'}}}" +"dom1_pl_lights-turn_on_all_the_upstairs_lights-2","gemma-3-27b-it","light","Turn on all the upstairs lights","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.bedroom_2_light': {'expected': {'state': 'on'}, 'got': {'state': 'off'}}, 'light.bedroom_3_light': {'expected'...ate': 'on'}, 'got': {'state': 'off'}}, 'light.bedroom_4_light': {'expected': {'state': 'on'}, 'got': {'state': 'off'}}}" +"dom1_pl_lights-turn_on_all_the_upstairs_lights-3","gemma-3-27b-it","light","Turn on all the upstairs lights","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.bedroom_2_light': {'expected': {'state': 'on'}, 'got': {'state': 'off'}}, 'light.bedroom_3_light': {'expected'...ate': 'on'}, 'got': {'state': 'off'}}, 'light.bedroom_4_light': {'expected': {'state': 'on'}, 'got': {'state': 'off'}}}" +"dom1_pl_lights-turn_on_all_the_upstairs_lights-4","gemma-3-27b-it","light","Turn on all the upstairs lights","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.bedroom_2_light': {'expected': {'state': 'on'}, 'got': {'state': 'off'}}, 'light.bedroom_3_light': {'expected'...ate': 'on'}, 'got': {'state': 'off'}}, 'light.bedroom_4_light': {'expected': {'state': 'on'}, 'got': {'state': 'off'}}}" +"dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-0","gemma-3-27b-it","light","Turn on both the kitchen light and living room lights","","Error talking to API","eval-test_expected_states","Good","" +"dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-1","gemma-3-27b-it","light","Turn on both the kitchen light and living room lights","","Error talking to API","eval-test_expected_states","Good","" +"dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-2","gemma-3-27b-it","light","Turn on both the kitchen light and living room lights","","Error talking to API","eval-test_expected_states","Good","" +"dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-3","gemma-3-27b-it","light","Turn on both the kitchen light and living room lights","","Error talking to API","eval-test_expected_states","Good","" +"dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-4","gemma-3-27b-it","light","Turn on both the kitchen light and living room lights","","Error talking to API","eval-test_expected_states","Good","" +"dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-0","gemma-3-27b-it","light","Turn on the kitchen light and then turn off the living room light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-1","gemma-3-27b-it","light","Turn on the kitchen light and then turn off the living room light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-2","gemma-3-27b-it","light","Turn on the kitchen light and then turn off the living room light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-3","gemma-3-27b-it","light","Turn on the kitchen light and then turn off the living room light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-4","gemma-3-27b-it","light","Turn on the kitchen light and then turn off the living room light","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-0","gemma-3-27b-it","light","Turn on the light in the kitchen, then turn off the light in the living room","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-1","gemma-3-27b-it","light","Turn on the light in the kitchen, then turn off the light in the living room","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-2","gemma-3-27b-it","light","Turn on the light in the kitchen, then turn off the light in the living room","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-3","gemma-3-27b-it","light","Turn on the light in the kitchen, then turn off the light in the living room","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-4","gemma-3-27b-it","light","Turn on the light in the kitchen, then turn off the light in the living room","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'light.living_room_light': {'expected': {'state': 'off'}, 'got': {'state': 'on'}}}" +"dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-0","gemma-3-27b-it","light","Turn on the living room and kitchen lights","","Error talking to API","eval-test_expected_states","Good","" +"dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-1","gemma-3-27b-it","light","Turn on the living room and kitchen lights","","Error talking to API","eval-test_expected_states","Good","" +"dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-2","gemma-3-27b-it","light","Turn on the living room and kitchen lights","","Error talking to API","eval-test_expected_states","Good","" +"dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-3","gemma-3-27b-it","light","Turn on the living room and kitchen lights","","Error talking to API","eval-test_expected_states","Good","" +"dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-4","gemma-3-27b-it","light","Turn on the living room and kitchen lights","","Error talking to API","eval-test_expected_states","Good","" +"dom1_pl_todo-add_apples_to_my_trader_joe_s_list-0","gemma-3-27b-it","todo","add apples to my trader joe's list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.trader_joe_s': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-add_apples_to_my_trader_joe_s_list-1","gemma-3-27b-it","todo","add apples to my trader joe's list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.trader_joe_s': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-add_apples_to_my_trader_joe_s_list-2","gemma-3-27b-it","todo","add apples to my trader joe's list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.trader_joe_s': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-add_apples_to_my_trader_joe_s_list-3","gemma-3-27b-it","todo","add apples to my trader joe's list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.trader_joe_s': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-add_apples_to_my_trader_joe_s_list-4","gemma-3-27b-it","todo","add apples to my trader joe's list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.trader_joe_s': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-add_history_homework_to_my_personal_tasks-0","gemma-3-27b-it","todo","Add history homework to my personal tasks","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-add_history_homework_to_my_personal_tasks-1","gemma-3-27b-it","todo","Add history homework to my personal tasks","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-add_history_homework_to_my_personal_tasks-2","gemma-3-27b-it","todo","Add history homework to my personal tasks","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-add_history_homework_to_my_personal_tasks-3","gemma-3-27b-it","todo","Add history homework to my personal tasks","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-add_history_homework_to_my_personal_tasks-4","gemma-3-27b-it","todo","Add history homework to my personal tasks","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-add_history_homework_to_my_tasks-0","gemma-3-27b-it","todo","Add history homework to my tasks","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-add_history_homework_to_my_tasks-1","gemma-3-27b-it","todo","Add history homework to my tasks","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-add_history_homework_to_my_tasks-2","gemma-3-27b-it","todo","Add history homework to my tasks","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-add_history_homework_to_my_tasks-3","gemma-3-27b-it","todo","Add history homework to my tasks","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-add_history_homework_to_my_tasks-4","gemma-3-27b-it","todo","Add history homework to my tasks","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-put_apples_on_the_shopping_list-0","gemma-3-27b-it","todo","put apples on the shopping list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.trader_joe_s': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-put_apples_on_the_shopping_list-1","gemma-3-27b-it","todo","put apples on the shopping list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.trader_joe_s': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-put_apples_on_the_shopping_list-2","gemma-3-27b-it","todo","put apples on the shopping list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.trader_joe_s': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-put_apples_on_the_shopping_list-3","gemma-3-27b-it","todo","put apples on the shopping list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.trader_joe_s': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-put_apples_on_the_shopping_list-4","gemma-3-27b-it","todo","put apples on the shopping list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.trader_joe_s': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-put_apples_on_the_trader_joes_list-0","gemma-3-27b-it","todo","put apples on the trader joes list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.trader_joe_s': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-put_apples_on_the_trader_joes_list-1","gemma-3-27b-it","todo","put apples on the trader joes list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.trader_joe_s': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-put_apples_on_the_trader_joes_list-2","gemma-3-27b-it","todo","put apples on the trader joes list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.trader_joe_s': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-put_apples_on_the_trader_joes_list-3","gemma-3-27b-it","todo","put apples on the trader joes list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.trader_joe_s': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-put_apples_on_the_trader_joes_list-4","gemma-3-27b-it","todo","put apples on the trader joes list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.trader_joe_s': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-0","gemma-3-27b-it","todo","Put clean the kitchen on personal tasks list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-1","gemma-3-27b-it","todo","Put clean the kitchen on personal tasks list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-2","gemma-3-27b-it","todo","Put clean the kitchen on personal tasks list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-3","gemma-3-27b-it","todo","Put clean the kitchen on personal tasks list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-4","gemma-3-27b-it","todo","Put clean the kitchen on personal tasks list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-put_history_homework_on_my_task_list-0","gemma-3-27b-it","todo","Put history homework on my task list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-put_history_homework_on_my_task_list-1","gemma-3-27b-it","todo","Put history homework on my task list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-put_history_homework_on_my_task_list-2","gemma-3-27b-it","todo","Put history homework on my task list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-put_history_homework_on_my_task_list-3","gemma-3-27b-it","todo","Put history homework on my task list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-put_history_homework_on_my_task_list-4","gemma-3-27b-it","todo","Put history homework on my task list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-put_history_homework_on_personal_tasks_list-0","gemma-3-27b-it","todo","Put history homework on personal tasks list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-put_history_homework_on_personal_tasks_list-1","gemma-3-27b-it","todo","Put history homework on personal tasks list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-put_history_homework_on_personal_tasks_list-2","gemma-3-27b-it","todo","Put history homework on personal tasks list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-put_history_homework_on_personal_tasks_list-3","gemma-3-27b-it","todo","Put history homework on personal tasks list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-put_history_homework_on_personal_tasks_list-4","gemma-3-27b-it","todo","Put history homework on personal tasks list","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'todo.personal_tasks': {'expected': {'state': '1'}, 'got': {'state': '0'}}}" +"dom1_pl_todo-put_milk_on_the_list-0","gemma-3-27b-it","todo","put milk on the list","","Error talking to API","eval-test_expected_states","Good","" +"dom1_pl_todo-put_milk_on_the_list-1","gemma-3-27b-it","todo","put milk on the list","","Error talking to API","eval-test_expected_states","Good","" +"dom1_pl_todo-put_milk_on_the_list-2","gemma-3-27b-it","todo","put milk on the list","","Error talking to API","eval-test_expected_states","Good","" +"dom1_pl_todo-put_milk_on_the_list-3","gemma-3-27b-it","todo","put milk on the list","","Error talking to API","eval-test_expected_states","Good","" +"dom1_pl_todo-put_milk_on_the_list-4","gemma-3-27b-it","todo","put milk on the list","","Error talking to API","eval-test_expected_states","Good","" +"home1_us_cover_garage-close_the_garage_door-0","gemma-3-27b-it","cover","Close the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home1_us_cover_garage-close_the_garage_door-1","gemma-3-27b-it","cover","Close the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home1_us_cover_garage-close_the_garage_door-2","gemma-3-27b-it","cover","Close the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home1_us_cover_garage-close_the_garage_door-3","gemma-3-27b-it","cover","Close the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home1_us_cover_garage-close_the_garage_door-4","gemma-3-27b-it","cover","Close the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home1_us_cover_garage-open_the_garage_door-0","gemma-3-27b-it","cover","Open the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home1_us_cover_garage-open_the_garage_door-1","gemma-3-27b-it","cover","Open the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home1_us_cover_garage-open_the_garage_door-2","gemma-3-27b-it","cover","Open the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home1_us_cover_garage-open_the_garage_door-3","gemma-3-27b-it","cover","Open the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home1_us_cover_garage-open_the_garage_door-4","gemma-3-27b-it","cover","Open the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home1_us_cover_garage-please_open_the_garage_door-0","gemma-3-27b-it","cover","Please open the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home1_us_cover_garage-please_open_the_garage_door-1","gemma-3-27b-it","cover","Please open the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home1_us_cover_garage-please_open_the_garage_door-2","gemma-3-27b-it","cover","Please open the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home1_us_cover_garage-please_open_the_garage_door-3","gemma-3-27b-it","cover","Please open the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home1_us_cover_garage-please_open_the_garage_door-4","gemma-3-27b-it","cover","Please open the garage door","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.garage_door_opener': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home1_us_smart_lock-lock_all_the_locks_please-0","gemma-3-27b-it","lock","Lock all the locks please","","Error talking to API","eval-test_expected_states","Good","" +"home1_us_smart_lock-lock_all_the_locks_please-1","gemma-3-27b-it","lock","Lock all the locks please","","Error talking to API","eval-test_expected_states","Good","" +"home1_us_smart_lock-lock_all_the_locks_please-2","gemma-3-27b-it","lock","Lock all the locks please","","Error talking to API","eval-test_expected_states","Good","" +"home1_us_smart_lock-lock_all_the_locks_please-3","gemma-3-27b-it","lock","Lock all the locks please","","Error talking to API","eval-test_expected_states","Good","" +"home1_us_smart_lock-lock_all_the_locks_please-4","gemma-3-27b-it","lock","Lock all the locks please","","Error talking to API","eval-test_expected_states","Good","" +"home1_us_smart_lock-lock_the_door-0","gemma-3-27b-it","lock","Lock the door","","Error talking to API","eval-test_expected_states","Good","" +"home1_us_smart_lock-lock_the_door-1","gemma-3-27b-it","lock","Lock the door","","Error talking to API","eval-test_expected_states","Good","" +"home1_us_smart_lock-lock_the_door-2","gemma-3-27b-it","lock","Lock the door","","Error talking to API","eval-test_expected_states","Good","" +"home1_us_smart_lock-lock_the_door-3","gemma-3-27b-it","lock","Lock the door","","Error talking to API","eval-test_expected_states","Good","" +"home1_us_smart_lock-lock_the_door-4","gemma-3-27b-it","lock","Lock the door","","Error talking to API","eval-test_expected_states","Good","" +"home1_us_smart_lock-lock_the_entry_lock-0","gemma-3-27b-it","lock","Lock the entry lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'locked'}, 'got': {'state': 'unlocked'}}}" +"home1_us_smart_lock-lock_the_entry_lock-1","gemma-3-27b-it","lock","Lock the entry lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'locked'}, 'got': {'state': 'unlocked'}}}" +"home1_us_smart_lock-lock_the_entry_lock-2","gemma-3-27b-it","lock","Lock the entry lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'locked'}, 'got': {'state': 'unlocked'}}}" +"home1_us_smart_lock-lock_the_entry_lock-3","gemma-3-27b-it","lock","Lock the entry lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'locked'}, 'got': {'state': 'unlocked'}}}" +"home1_us_smart_lock-lock_the_entry_lock-4","gemma-3-27b-it","lock","Lock the entry lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'locked'}, 'got': {'state': 'unlocked'}}}" +"home1_us_smart_lock-lock_the_front_door_lock-0","gemma-3-27b-it","lock","Lock the front door lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'locked'}, 'got': {'state': 'unlocked'}}}" +"home1_us_smart_lock-lock_the_front_door_lock-1","gemma-3-27b-it","lock","Lock the front door lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'locked'}, 'got': {'state': 'unlocked'}}}" +"home1_us_smart_lock-lock_the_front_door_lock-2","gemma-3-27b-it","lock","Lock the front door lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'locked'}, 'got': {'state': 'unlocked'}}}" +"home1_us_smart_lock-lock_the_front_door_lock-3","gemma-3-27b-it","lock","Lock the front door lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'locked'}, 'got': {'state': 'unlocked'}}}" +"home1_us_smart_lock-lock_the_front_door_lock-4","gemma-3-27b-it","lock","Lock the front door lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'locked'}, 'got': {'state': 'unlocked'}}}" +"home1_us_smart_lock-lock_the_lock-0","gemma-3-27b-it","lock","Lock the lock","","Error talking to API","eval-test_expected_states","Good","" +"home1_us_smart_lock-lock_the_lock-1","gemma-3-27b-it","lock","Lock the lock","","Error talking to API","eval-test_expected_states","Good","" +"home1_us_smart_lock-lock_the_lock-2","gemma-3-27b-it","lock","Lock the lock","","Error talking to API","eval-test_expected_states","Good","" +"home1_us_smart_lock-lock_the_lock-3","gemma-3-27b-it","lock","Lock the lock","","Error talking to API","eval-test_expected_states","Good","" +"home1_us_smart_lock-lock_the_lock-4","gemma-3-27b-it","lock","Lock the lock","","Error talking to API","eval-test_expected_states","Good","" +"home1_us_smart_lock-lock_the_smart_lock-0","gemma-3-27b-it","lock","Lock the smart lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'locked'}, 'got': {'state': 'unlocked'}}}" +"home1_us_smart_lock-lock_the_smart_lock-1","gemma-3-27b-it","lock","Lock the smart lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'locked'}, 'got': {'state': 'unlocked'}}}" +"home1_us_smart_lock-lock_the_smart_lock-2","gemma-3-27b-it","lock","Lock the smart lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'locked'}, 'got': {'state': 'unlocked'}}}" +"home1_us_smart_lock-lock_the_smart_lock-3","gemma-3-27b-it","lock","Lock the smart lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'locked'}, 'got': {'state': 'unlocked'}}}" +"home1_us_smart_lock-lock_the_smart_lock-4","gemma-3-27b-it","lock","Lock the smart lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'locked'}, 'got': {'state': 'unlocked'}}}" +"home1_us_smart_lock-unlock_all_the_doors-0","gemma-3-27b-it","lock","Unlock all the doors","","Error talking to API","eval-test_expected_states","Good","" +"home1_us_smart_lock-unlock_all_the_doors-1","gemma-3-27b-it","lock","Unlock all the doors","","Error talking to API","eval-test_expected_states","Good","" +"home1_us_smart_lock-unlock_all_the_doors-2","gemma-3-27b-it","lock","Unlock all the doors","","Error talking to API","eval-test_expected_states","Good","" +"home1_us_smart_lock-unlock_all_the_doors-3","gemma-3-27b-it","lock","Unlock all the doors","","Error talking to API","eval-test_expected_states","Good","" +"home1_us_smart_lock-unlock_all_the_doors-4","gemma-3-27b-it","lock","Unlock all the doors","","Error talking to API","eval-test_expected_states","Good","" +"home1_us_smart_lock-unlock_the_entry_lock-0","gemma-3-27b-it","lock","Unlock the entry lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'unlocked'}, 'got': {'state': 'locked'}}}" +"home1_us_smart_lock-unlock_the_entry_lock-1","gemma-3-27b-it","lock","Unlock the entry lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'unlocked'}, 'got': {'state': 'locked'}}}" +"home1_us_smart_lock-unlock_the_entry_lock-2","gemma-3-27b-it","lock","Unlock the entry lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'unlocked'}, 'got': {'state': 'locked'}}}" +"home1_us_smart_lock-unlock_the_entry_lock-3","gemma-3-27b-it","lock","Unlock the entry lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'unlocked'}, 'got': {'state': 'locked'}}}" +"home1_us_smart_lock-unlock_the_entry_lock-4","gemma-3-27b-it","lock","Unlock the entry lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'unlocked'}, 'got': {'state': 'locked'}}}" +"home1_us_smart_lock-unlock_the_smart_lock-0","gemma-3-27b-it","lock","Unlock the smart lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'unlocked'}, 'got': {'state': 'locked'}}}" +"home1_us_smart_lock-unlock_the_smart_lock-1","gemma-3-27b-it","lock","Unlock the smart lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'unlocked'}, 'got': {'state': 'locked'}}}" +"home1_us_smart_lock-unlock_the_smart_lock-2","gemma-3-27b-it","lock","Unlock the smart lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'unlocked'}, 'got': {'state': 'locked'}}}" +"home1_us_smart_lock-unlock_the_smart_lock-3","gemma-3-27b-it","lock","Unlock the smart lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'unlocked'}, 'got': {'state': 'locked'}}}" +"home1_us_smart_lock-unlock_the_smart_lock-4","gemma-3-27b-it","lock","Unlock the smart lock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'lock.smart_lock': {'expected': {'state': 'unlocked'}, 'got': {'state': 'locked'}}}" +"home1_us_vacuum-clean_the_living_room-0","gemma-3-27b-it","vacuum","Clean the living room","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-clean_the_living_room-1","gemma-3-27b-it","vacuum","Clean the living room","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-clean_the_living_room-2","gemma-3-27b-it","vacuum","Clean the living room","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-clean_the_living_room-3","gemma-3-27b-it","vacuum","Clean the living room","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-clean_the_living_room-4","gemma-3-27b-it","vacuum","Clean the living room","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-please_start_cleaning-0","gemma-3-27b-it","vacuum","Please start cleaning","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-please_start_cleaning-1","gemma-3-27b-it","vacuum","Please start cleaning","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-please_start_cleaning-2","gemma-3-27b-it","vacuum","Please start cleaning","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-please_start_cleaning-3","gemma-3-27b-it","vacuum","Please start cleaning","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-please_start_cleaning-4","gemma-3-27b-it","vacuum","Please start cleaning","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-return_roborock_downstairs_to_base-0","gemma-3-27b-it","vacuum","Return Roborock Downstairs to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-return_roborock_downstairs_to_base-1","gemma-3-27b-it","vacuum","Return Roborock Downstairs to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-return_roborock_downstairs_to_base-2","gemma-3-27b-it","vacuum","Return Roborock Downstairs to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-return_roborock_downstairs_to_base-3","gemma-3-27b-it","vacuum","Return Roborock Downstairs to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-return_roborock_downstairs_to_base-4","gemma-3-27b-it","vacuum","Return Roborock Downstairs to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-roborock_downstairs_return_to_base-0","gemma-3-27b-it","vacuum","Roborock Downstairs return to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-roborock_downstairs_return_to_base-1","gemma-3-27b-it","vacuum","Roborock Downstairs return to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-roborock_downstairs_return_to_base-2","gemma-3-27b-it","vacuum","Roborock Downstairs return to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-roborock_downstairs_return_to_base-3","gemma-3-27b-it","vacuum","Roborock Downstairs return to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-roborock_downstairs_return_to_base-4","gemma-3-27b-it","vacuum","Roborock Downstairs return to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-roborock_return_to_base-0","gemma-3-27b-it","vacuum","Roborock return to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-roborock_return_to_base-1","gemma-3-27b-it","vacuum","Roborock return to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-roborock_return_to_base-2","gemma-3-27b-it","vacuum","Roborock return to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-roborock_return_to_base-3","gemma-3-27b-it","vacuum","Roborock return to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-roborock_return_to_base-4","gemma-3-27b-it","vacuum","Roborock return to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-start_roborock-0","gemma-3-27b-it","vacuum","Start Roborock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-start_roborock-1","gemma-3-27b-it","vacuum","Start Roborock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-start_roborock-2","gemma-3-27b-it","vacuum","Start Roborock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-start_roborock-3","gemma-3-27b-it","vacuum","Start Roborock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-start_roborock-4","gemma-3-27b-it","vacuum","Start Roborock","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-start_roborock_downstairs-0","gemma-3-27b-it","vacuum","Start Roborock Downstairs","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-start_roborock_downstairs-1","gemma-3-27b-it","vacuum","Start Roborock Downstairs","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-start_roborock_downstairs-2","gemma-3-27b-it","vacuum","Start Roborock Downstairs","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-start_roborock_downstairs-3","gemma-3-27b-it","vacuum","Start Roborock Downstairs","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-start_roborock_downstairs-4","gemma-3-27b-it","vacuum","Start Roborock Downstairs","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-start_roborock_downstairs_vacuum-0","gemma-3-27b-it","vacuum","Start Roborock Downstairs vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-start_roborock_downstairs_vacuum-1","gemma-3-27b-it","vacuum","Start Roborock Downstairs vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-start_roborock_downstairs_vacuum-2","gemma-3-27b-it","vacuum","Start Roborock Downstairs vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-start_roborock_downstairs_vacuum-3","gemma-3-27b-it","vacuum","Start Roborock Downstairs vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-start_roborock_downstairs_vacuum-4","gemma-3-27b-it","vacuum","Start Roborock Downstairs vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-start_the_vacuum-0","gemma-3-27b-it","vacuum","Start the vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-start_the_vacuum-1","gemma-3-27b-it","vacuum","Start the vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-start_the_vacuum-2","gemma-3-27b-it","vacuum","Start the vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-start_the_vacuum-3","gemma-3-27b-it","vacuum","Start the vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-start_the_vacuum-4","gemma-3-27b-it","vacuum","Start the vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-start_vacuuming-0","gemma-3-27b-it","vacuum","Start vacuuming","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-start_vacuuming-1","gemma-3-27b-it","vacuum","Start vacuuming","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-start_vacuuming-2","gemma-3-27b-it","vacuum","Start vacuuming","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-start_vacuuming-3","gemma-3-27b-it","vacuum","Start vacuuming","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-start_vacuuming-4","gemma-3-27b-it","vacuum","Start vacuuming","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-stop_roborock_downstairs-0","gemma-3-27b-it","vacuum","Stop Roborock Downstairs","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-stop_roborock_downstairs-1","gemma-3-27b-it","vacuum","Stop Roborock Downstairs","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-stop_roborock_downstairs-2","gemma-3-27b-it","vacuum","Stop Roborock Downstairs","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-stop_roborock_downstairs-3","gemma-3-27b-it","vacuum","Stop Roborock Downstairs","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-stop_roborock_downstairs-4","gemma-3-27b-it","vacuum","Stop Roborock Downstairs","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-stop_the_downstairs_vacuum-0","gemma-3-27b-it","vacuum","Stop the downstairs vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-stop_the_downstairs_vacuum-1","gemma-3-27b-it","vacuum","Stop the downstairs vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-stop_the_downstairs_vacuum-2","gemma-3-27b-it","vacuum","Stop the downstairs vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-stop_the_downstairs_vacuum-3","gemma-3-27b-it","vacuum","Stop the downstairs vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-stop_the_downstairs_vacuum-4","gemma-3-27b-it","vacuum","Stop the downstairs vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-stop_the_vacuum-0","gemma-3-27b-it","vacuum","Stop the vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-stop_the_vacuum-1","gemma-3-27b-it","vacuum","Stop the vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-stop_the_vacuum-2","gemma-3-27b-it","vacuum","Stop the vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-stop_the_vacuum-3","gemma-3-27b-it","vacuum","Stop the vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-stop_the_vacuum-4","gemma-3-27b-it","vacuum","Stop the vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-stop_vacuuming-0","gemma-3-27b-it","vacuum","Stop vacuuming","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-stop_vacuuming-1","gemma-3-27b-it","vacuum","Stop vacuuming","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-stop_vacuuming-2","gemma-3-27b-it","vacuum","Stop vacuuming","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-stop_vacuuming-3","gemma-3-27b-it","vacuum","Stop vacuuming","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-stop_vacuuming-4","gemma-3-27b-it","vacuum","Stop vacuuming","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-turn_on_the_vacuum-0","gemma-3-27b-it","vacuum","Turn on the vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-turn_on_the_vacuum-1","gemma-3-27b-it","vacuum","Turn on the vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-turn_on_the_vacuum-2","gemma-3-27b-it","vacuum","Turn on the vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-turn_on_the_vacuum-3","gemma-3-27b-it","vacuum","Turn on the vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-turn_on_the_vacuum-4","gemma-3-27b-it","vacuum","Turn on the vacuum","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-vacuum_downstairs-0","gemma-3-27b-it","vacuum","Vacuum downstairs","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-vacuum_downstairs-1","gemma-3-27b-it","vacuum","Vacuum downstairs","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-vacuum_downstairs-2","gemma-3-27b-it","vacuum","Vacuum downstairs","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-vacuum_downstairs-3","gemma-3-27b-it","vacuum","Vacuum downstairs","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-vacuum_downstairs-4","gemma-3-27b-it","vacuum","Vacuum downstairs","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'cleaning'}, 'got': {'state': 'off'}}}" +"home1_us_vacuum-vacuum_return_to_base-0","gemma-3-27b-it","vacuum","Vacuum return to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-vacuum_return_to_base-1","gemma-3-27b-it","vacuum","Vacuum return to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-vacuum_return_to_base-2","gemma-3-27b-it","vacuum","Vacuum return to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-vacuum_return_to_base-3","gemma-3-27b-it","vacuum","Vacuum return to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home1_us_vacuum-vacuum_return_to_base-4","gemma-3-27b-it","vacuum","Vacuum return to base","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'vacuum.roborock_downstairs': {'expected': {'state': 'returning'}, 'got': {'state': 'cleaning'}}}" +"home2_ru_water_valve-close_the_front_yard_valve-0","gemma-3-27b-it","valve","close the front yard valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 0, 'state': 'closed'}, 'got': {'current_position': 100, 'state': 'open'}}}" +"home2_ru_water_valve-close_the_front_yard_valve-1","gemma-3-27b-it","valve","close the front yard valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 0, 'state': 'closed'}, 'got': {'current_position': 100, 'state': 'open'}}}" +"home2_ru_water_valve-close_the_front_yard_valve-2","gemma-3-27b-it","valve","close the front yard valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 0, 'state': 'closed'}, 'got': {'current_position': 100, 'state': 'open'}}}" +"home2_ru_water_valve-close_the_front_yard_valve-3","gemma-3-27b-it","valve","close the front yard valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 0, 'state': 'closed'}, 'got': {'current_position': 100, 'state': 'open'}}}" +"home2_ru_water_valve-close_the_front_yard_valve-4","gemma-3-27b-it","valve","close the front yard valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 0, 'state': 'closed'}, 'got': {'current_position': 100, 'state': 'open'}}}" +"home2_ru_water_valve-close_the_irrigation_valve-0","gemma-3-27b-it","valve","close the irrigation valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 0, 'state': 'closed'}, 'got': {'current_position': 100, 'state': 'open'}}}" +"home2_ru_water_valve-close_the_irrigation_valve-1","gemma-3-27b-it","valve","close the irrigation valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 0, 'state': 'closed'}, 'got': {'current_position': 100, 'state': 'open'}}}" +"home2_ru_water_valve-close_the_irrigation_valve-2","gemma-3-27b-it","valve","close the irrigation valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 0, 'state': 'closed'}, 'got': {'current_position': 100, 'state': 'open'}}}" +"home2_ru_water_valve-close_the_irrigation_valve-3","gemma-3-27b-it","valve","close the irrigation valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 0, 'state': 'closed'}, 'got': {'current_position': 100, 'state': 'open'}}}" +"home2_ru_water_valve-close_the_irrigation_valve-4","gemma-3-27b-it","valve","close the irrigation valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 0, 'state': 'closed'}, 'got': {'current_position': 100, 'state': 'open'}}}" +"home2_ru_water_valve-open_the_front_yard_valve-0","gemma-3-27b-it","valve","open the front yard valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-open_the_front_yard_valve-1","gemma-3-27b-it","valve","open the front yard valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-open_the_front_yard_valve-2","gemma-3-27b-it","valve","open the front yard valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-open_the_front_yard_valve-3","gemma-3-27b-it","valve","open the front yard valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-open_the_front_yard_valve-4","gemma-3-27b-it","valve","open the front yard valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-open_the_irrigation_valve-0","gemma-3-27b-it","valve","open the irrigation valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-open_the_irrigation_valve-1","gemma-3-27b-it","valve","open the irrigation valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-open_the_irrigation_valve-2","gemma-3-27b-it","valve","open the irrigation valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-open_the_irrigation_valve-3","gemma-3-27b-it","valve","open the irrigation valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-open_the_irrigation_valve-4","gemma-3-27b-it","valve","open the irrigation valve","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-set_the_irrigation_valve_to_50-0","gemma-3-27b-it","valve","set the irrigation valve to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 50, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-set_the_irrigation_valve_to_50-1","gemma-3-27b-it","valve","set the irrigation valve to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 50, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-set_the_irrigation_valve_to_50-2","gemma-3-27b-it","valve","set the irrigation valve to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 50, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-set_the_irrigation_valve_to_50-3","gemma-3-27b-it","valve","set the irrigation valve to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 50, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-set_the_irrigation_valve_to_50-4","gemma-3-27b-it","valve","set the irrigation valve to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 50, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-turn_off_the_water_in_the_front_yard-0","gemma-3-27b-it","valve","turn off the water in the front yard","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 0, 'state': 'closed'}, 'got': {'current_position': 100, 'state': 'open'}}}" +"home2_ru_water_valve-turn_off_the_water_in_the_front_yard-1","gemma-3-27b-it","valve","turn off the water in the front yard","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 0, 'state': 'closed'}, 'got': {'current_position': 100, 'state': 'open'}}}" +"home2_ru_water_valve-turn_off_the_water_in_the_front_yard-2","gemma-3-27b-it","valve","turn off the water in the front yard","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 0, 'state': 'closed'}, 'got': {'current_position': 100, 'state': 'open'}}}" +"home2_ru_water_valve-turn_off_the_water_in_the_front_yard-3","gemma-3-27b-it","valve","turn off the water in the front yard","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 0, 'state': 'closed'}, 'got': {'current_position': 100, 'state': 'open'}}}" +"home2_ru_water_valve-turn_off_the_water_in_the_front_yard-4","gemma-3-27b-it","valve","turn off the water in the front yard","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 0, 'state': 'closed'}, 'got': {'current_position': 100, 'state': 'open'}}}" +"home2_ru_water_valve-turn_on_the_water_in_the_front_yard-0","gemma-3-27b-it","valve","turn on the water in the front yard","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-turn_on_the_water_in_the_front_yard-1","gemma-3-27b-it","valve","turn on the water in the front yard","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-turn_on_the_water_in_the_front_yard-2","gemma-3-27b-it","valve","turn on the water in the front yard","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-turn_on_the_water_in_the_front_yard-3","gemma-3-27b-it","valve","turn on the water in the front yard","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-turn_on_the_water_in_the_front_yard-4","gemma-3-27b-it","valve","turn on the water in the front yard","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-water_the_front_yard-0","gemma-3-27b-it","valve","water the front yard","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-water_the_front_yard-1","gemma-3-27b-it","valve","water the front yard","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-water_the_front_yard-2","gemma-3-27b-it","valve","water the front yard","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-water_the_front_yard-3","gemma-3-27b-it","valve","water the front yard","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-water_the_front_yard-4","gemma-3-27b-it","valve","water the front yard","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-water_the_garden-0","gemma-3-27b-it","valve","water the garden","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-water_the_garden-1","gemma-3-27b-it","valve","water the garden","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-water_the_garden-2","gemma-3-27b-it","valve","water the garden","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-water_the_garden-3","gemma-3-27b-it","valve","water the garden","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home2_ru_water_valve-water_the_garden-4","gemma-3-27b-it","valve","water the garden","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'valve.irrigation_valve': {'expected': {'current_position': 100, 'state': 'open'}, 'got': {'current_position': 0, 'state': 'closed'}}}" +"home5_cn_fan-turn_off_the_bedroom_fan-0","gemma-3-27b-it","fan","Turn off the bedroom fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 0, 'state': 'off'}, 'got': {'percentage': 100, 'state': 'on'}}}" +"home5_cn_fan-turn_off_the_bedroom_fan-1","gemma-3-27b-it","fan","Turn off the bedroom fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 0, 'state': 'off'}, 'got': {'percentage': 100, 'state': 'on'}}}" +"home5_cn_fan-turn_off_the_bedroom_fan-2","gemma-3-27b-it","fan","Turn off the bedroom fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 0, 'state': 'off'}, 'got': {'percentage': 100, 'state': 'on'}}}" +"home5_cn_fan-turn_off_the_bedroom_fan-3","gemma-3-27b-it","fan","Turn off the bedroom fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 0, 'state': 'off'}, 'got': {'percentage': 100, 'state': 'on'}}}" +"home5_cn_fan-turn_off_the_bedroom_fan-4","gemma-3-27b-it","fan","Turn off the bedroom fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 0, 'state': 'off'}, 'got': {'percentage': 100, 'state': 'on'}}}" +"home5_cn_fan-turn_off_the_fan-0","gemma-3-27b-it","fan","Turn off the fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 0, 'state': 'off'}, 'got': {'percentage': 100, 'state': 'on'}}}" +"home5_cn_fan-turn_off_the_fan-1","gemma-3-27b-it","fan","Turn off the fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 0, 'state': 'off'}, 'got': {'percentage': 100, 'state': 'on'}}}" +"home5_cn_fan-turn_off_the_fan-2","gemma-3-27b-it","fan","Turn off the fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 0, 'state': 'off'}, 'got': {'percentage': 100, 'state': 'on'}}}" +"home5_cn_fan-turn_off_the_fan-3","gemma-3-27b-it","fan","Turn off the fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 0, 'state': 'off'}, 'got': {'percentage': 100, 'state': 'on'}}}" +"home5_cn_fan-turn_off_the_fan-4","gemma-3-27b-it","fan","Turn off the fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 0, 'state': 'off'}, 'got': {'percentage': 100, 'state': 'on'}}}" +"home5_cn_fan-turn_on_the_bedroom_1_fan-0","gemma-3-27b-it","fan","Turn on the bedroom 1 fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home5_cn_fan-turn_on_the_bedroom_1_fan-1","gemma-3-27b-it","fan","Turn on the bedroom 1 fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home5_cn_fan-turn_on_the_bedroom_1_fan-2","gemma-3-27b-it","fan","Turn on the bedroom 1 fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home5_cn_fan-turn_on_the_bedroom_1_fan-3","gemma-3-27b-it","fan","Turn on the bedroom 1 fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home5_cn_fan-turn_on_the_bedroom_1_fan-4","gemma-3-27b-it","fan","Turn on the bedroom 1 fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home5_cn_fan-turn_on_the_bedroom_fan-0","gemma-3-27b-it","fan","Turn on the bedroom fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home5_cn_fan-turn_on_the_bedroom_fan-1","gemma-3-27b-it","fan","Turn on the bedroom fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home5_cn_fan-turn_on_the_bedroom_fan-2","gemma-3-27b-it","fan","Turn on the bedroom fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home5_cn_fan-turn_on_the_bedroom_fan-3","gemma-3-27b-it","fan","Turn on the bedroom fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home5_cn_fan-turn_on_the_bedroom_fan-4","gemma-3-27b-it","fan","Turn on the bedroom fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home5_cn_fan-turn_on_the_fan-0","gemma-3-27b-it","fan","Turn on the fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home5_cn_fan-turn_on_the_fan-1","gemma-3-27b-it","fan","Turn on the fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home5_cn_fan-turn_on_the_fan-2","gemma-3-27b-it","fan","Turn on the fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home5_cn_fan-turn_on_the_fan-3","gemma-3-27b-it","fan","Turn on the fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home5_cn_fan-turn_on_the_fan-4","gemma-3-27b-it","fan","Turn on the fan","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home5_cn_fan-turn_on_the_fan_in_the_bedroom-0","gemma-3-27b-it","fan","Turn on the fan in the bedroom","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home5_cn_fan-turn_on_the_fan_in_the_bedroom-1","gemma-3-27b-it","fan","Turn on the fan in the bedroom","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home5_cn_fan-turn_on_the_fan_in_the_bedroom-2","gemma-3-27b-it","fan","Turn on the fan in the bedroom","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home5_cn_fan-turn_on_the_fan_in_the_bedroom-3","gemma-3-27b-it","fan","Turn on the fan in the bedroom","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home5_cn_fan-turn_on_the_fan_in_the_bedroom-4","gemma-3-27b-it","fan","Turn on the fan in the bedroom","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'fan.bedroom_fan': {'expected': {'percentage': 100, 'state': 'on'}, 'got': {'percentage': 0, 'state': 'off'}}}" +"home7_dk_cover_curtain-close_the_curtains-0","gemma-3-27b-it","cover","Close the curtains","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home7_dk_cover_curtain-close_the_curtains-1","gemma-3-27b-it","cover","Close the curtains","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home7_dk_cover_curtain-close_the_curtains-2","gemma-3-27b-it","cover","Close the curtains","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home7_dk_cover_curtain-close_the_curtains-3","gemma-3-27b-it","cover","Close the curtains","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home7_dk_cover_curtain-close_the_curtains-4","gemma-3-27b-it","cover","Close the curtains","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home7_dk_cover_curtain-close_the_living_room_curtain-0","gemma-3-27b-it","cover","Close the living room curtain","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home7_dk_cover_curtain-close_the_living_room_curtain-1","gemma-3-27b-it","cover","Close the living room curtain","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home7_dk_cover_curtain-close_the_living_room_curtain-2","gemma-3-27b-it","cover","Close the living room curtain","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home7_dk_cover_curtain-close_the_living_room_curtain-3","gemma-3-27b-it","cover","Close the living room curtain","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home7_dk_cover_curtain-close_the_living_room_curtain-4","gemma-3-27b-it","cover","Close the living room curtain","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home7_dk_cover_curtain-close_the_smart_curtains-0","gemma-3-27b-it","cover","Close the smart curtains","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home7_dk_cover_curtain-close_the_smart_curtains-1","gemma-3-27b-it","cover","Close the smart curtains","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home7_dk_cover_curtain-close_the_smart_curtains-2","gemma-3-27b-it","cover","Close the smart curtains","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home7_dk_cover_curtain-close_the_smart_curtains-3","gemma-3-27b-it","cover","Close the smart curtains","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home7_dk_cover_curtain-close_the_smart_curtains-4","gemma-3-27b-it","cover","Close the smart curtains","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'closed'}, 'got': {'state': 'open'}}}" +"home7_dk_cover_curtain-open_the_living_room_curtains-0","gemma-3-27b-it","cover","Open the living room curtains","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain-open_the_living_room_curtains-1","gemma-3-27b-it","cover","Open the living room curtains","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain-open_the_living_room_curtains-2","gemma-3-27b-it","cover","Open the living room curtains","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain-open_the_living_room_curtains-3","gemma-3-27b-it","cover","Open the living room curtains","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain-open_the_living_room_curtains-4","gemma-3-27b-it","cover","Open the living room curtains","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain-open_the_living_room_smart_curtain-0","gemma-3-27b-it","cover","Open the living room smart curtain","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain-open_the_living_room_smart_curtain-1","gemma-3-27b-it","cover","Open the living room smart curtain","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain-open_the_living_room_smart_curtain-2","gemma-3-27b-it","cover","Open the living room smart curtain","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain-open_the_living_room_smart_curtain-3","gemma-3-27b-it","cover","Open the living room smart curtain","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain-open_the_living_room_smart_curtain-4","gemma-3-27b-it","cover","Open the living room smart curtain","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain-open_the_smart_curtain-0","gemma-3-27b-it","cover","Open the smart curtain","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain-open_the_smart_curtain-1","gemma-3-27b-it","cover","Open the smart curtain","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain-open_the_smart_curtain-2","gemma-3-27b-it","cover","Open the smart curtain","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain-open_the_smart_curtain-3","gemma-3-27b-it","cover","Open the smart curtain","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain-open_the_smart_curtain-4","gemma-3-27b-it","cover","Open the smart curtain","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain-set_the_living_room_curtains_to_50-0","gemma-3-27b-it","cover","Set the living room curtains to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain-set_the_living_room_curtains_to_50-1","gemma-3-27b-it","cover","Set the living room curtains to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain-set_the_living_room_curtains_to_50-2","gemma-3-27b-it","cover","Set the living room curtains to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain-set_the_living_room_curtains_to_50-3","gemma-3-27b-it","cover","Set the living room curtains to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain-set_the_living_room_curtains_to_50-4","gemma-3-27b-it","cover","Set the living room curtains to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain-set_the_smart_curtain_to_50-0","gemma-3-27b-it","cover","Set the smart curtain to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain-set_the_smart_curtain_to_50-1","gemma-3-27b-it","cover","Set the smart curtain to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain-set_the_smart_curtain_to_50-2","gemma-3-27b-it","cover","Set the smart curtain to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain-set_the_smart_curtain_to_50-3","gemma-3-27b-it","cover","Set the smart curtain to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_cover_curtain-set_the_smart_curtain_to_50-4","gemma-3-27b-it","cover","Set the smart curtain to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'cover.smart_curtain': {'expected': {'state': 'open'}, 'got': {'state': 'closed'}}}" +"home7_dk_media_player-mute_the_music-0","gemma-3-27b-it","media-player","Mute the music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.0}, 'got': {'volume_level': 0.6}}}" +"home7_dk_media_player-mute_the_music-1","gemma-3-27b-it","media-player","Mute the music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.0}, 'got': {'volume_level': 0.6}}}" +"home7_dk_media_player-mute_the_music-2","gemma-3-27b-it","media-player","Mute the music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.0}, 'got': {'volume_level': 0.6}}}" +"home7_dk_media_player-mute_the_music-3","gemma-3-27b-it","media-player","Mute the music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.0}, 'got': {'volume_level': 0.6}}}" +"home7_dk_media_player-mute_the_music-4","gemma-3-27b-it","media-player","Mute the music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.0}, 'got': {'volume_level': 0.6}}}" +"home7_dk_media_player-mute_the_outdoor_speakers-0","gemma-3-27b-it","media-player","Mute the outdoor speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.0}, 'got': {'volume_level': 0.6}}}" +"home7_dk_media_player-mute_the_outdoor_speakers-1","gemma-3-27b-it","media-player","Mute the outdoor speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.0}, 'got': {'volume_level': 0.6}}}" +"home7_dk_media_player-mute_the_outdoor_speakers-2","gemma-3-27b-it","media-player","Mute the outdoor speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.0}, 'got': {'volume_level': 0.6}}}" +"home7_dk_media_player-mute_the_outdoor_speakers-3","gemma-3-27b-it","media-player","Mute the outdoor speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.0}, 'got': {'volume_level': 0.6}}}" +"home7_dk_media_player-mute_the_outdoor_speakers-4","gemma-3-27b-it","media-player","Mute the outdoor speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.0}, 'got': {'volume_level': 0.6}}}" +"home7_dk_media_player-next_song-0","gemma-3-27b-it","media-player","Next song","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player-next_song-1","gemma-3-27b-it","media-player","Next song","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player-next_song-2","gemma-3-27b-it","media-player","Next song","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player-next_song-3","gemma-3-27b-it","media-player","Next song","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player-next_song-4","gemma-3-27b-it","media-player","Next song","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player-pause_outdoor_speakers-0","gemma-3-27b-it","media-player","Pause Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-pause_outdoor_speakers-1","gemma-3-27b-it","media-player","Pause Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-pause_outdoor_speakers-2","gemma-3-27b-it","media-player","Pause Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-pause_outdoor_speakers-3","gemma-3-27b-it","media-player","Pause Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-pause_outdoor_speakers-4","gemma-3-27b-it","media-player","Pause Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-pause_the_music-0","gemma-3-27b-it","media-player","Pause the music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-pause_the_music-1","gemma-3-27b-it","media-player","Pause the music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-pause_the_music-2","gemma-3-27b-it","media-player","Pause the music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-pause_the_music-3","gemma-3-27b-it","media-player","Pause the music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-pause_the_music-4","gemma-3-27b-it","media-player","Pause the music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-pause_the_music_outside-0","gemma-3-27b-it","media-player","Pause the music outside","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-pause_the_music_outside-1","gemma-3-27b-it","media-player","Pause the music outside","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-pause_the_music_outside-2","gemma-3-27b-it","media-player","Pause the music outside","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-pause_the_music_outside-3","gemma-3-27b-it","media-player","Pause the music outside","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-pause_the_music_outside-4","gemma-3-27b-it","media-player","Pause the music outside","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-pause_the_rooftop_terrace_music-0","gemma-3-27b-it","media-player","Pause the Rooftop Terrace music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-pause_the_rooftop_terrace_music-1","gemma-3-27b-it","media-player","Pause the Rooftop Terrace music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-pause_the_rooftop_terrace_music-2","gemma-3-27b-it","media-player","Pause the Rooftop Terrace music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-pause_the_rooftop_terrace_music-3","gemma-3-27b-it","media-player","Pause the Rooftop Terrace music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-pause_the_rooftop_terrace_music-4","gemma-3-27b-it","media-player","Pause the Rooftop Terrace music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'paused'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-resume_outdoor_speakers-0","gemma-3-27b-it","media-player","Resume Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" +"home7_dk_media_player-resume_outdoor_speakers-1","gemma-3-27b-it","media-player","Resume Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" +"home7_dk_media_player-resume_outdoor_speakers-2","gemma-3-27b-it","media-player","Resume Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" +"home7_dk_media_player-resume_outdoor_speakers-3","gemma-3-27b-it","media-player","Resume Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" +"home7_dk_media_player-resume_outdoor_speakers-4","gemma-3-27b-it","media-player","Resume Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" +"home7_dk_media_player-resume_playing_the_music-0","gemma-3-27b-it","media-player","Resume playing the music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" +"home7_dk_media_player-resume_playing_the_music-1","gemma-3-27b-it","media-player","Resume playing the music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" +"home7_dk_media_player-resume_playing_the_music-2","gemma-3-27b-it","media-player","Resume playing the music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" +"home7_dk_media_player-resume_playing_the_music-3","gemma-3-27b-it","media-player","Resume playing the music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" +"home7_dk_media_player-resume_playing_the_music-4","gemma-3-27b-it","media-player","Resume playing the music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" +"home7_dk_media_player-set_outdoor_speakers_volume_to_50-0","gemma-3-27b-it","media-player","Set outdoor speakers volume to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.5}, 'got': {'volume_level': 0.9}}}" +"home7_dk_media_player-set_outdoor_speakers_volume_to_50-1","gemma-3-27b-it","media-player","Set outdoor speakers volume to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.5}, 'got': {'volume_level': 0.9}}}" +"home7_dk_media_player-set_outdoor_speakers_volume_to_50-2","gemma-3-27b-it","media-player","Set outdoor speakers volume to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.5}, 'got': {'volume_level': 0.9}}}" +"home7_dk_media_player-set_outdoor_speakers_volume_to_50-3","gemma-3-27b-it","media-player","Set outdoor speakers volume to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.5}, 'got': {'volume_level': 0.9}}}" +"home7_dk_media_player-set_outdoor_speakers_volume_to_50-4","gemma-3-27b-it","media-player","Set outdoor speakers volume to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.5}, 'got': {'volume_level': 0.9}}}" +"home7_dk_media_player-skip_song_on_outdoor_speakers-0","gemma-3-27b-it","media-player","Skip song on outdoor speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player-skip_song_on_outdoor_speakers-1","gemma-3-27b-it","media-player","Skip song on outdoor speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player-skip_song_on_outdoor_speakers-2","gemma-3-27b-it","media-player","Skip song on outdoor speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player-skip_song_on_outdoor_speakers-3","gemma-3-27b-it","media-player","Skip song on outdoor speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player-skip_song_on_outdoor_speakers-4","gemma-3-27b-it","media-player","Skip song on outdoor speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player-skip_to_the_next_track-0","gemma-3-27b-it","media-player","Skip to the next track","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player-skip_to_the_next_track-1","gemma-3-27b-it","media-player","Skip to the next track","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player-skip_to_the_next_track-2","gemma-3-27b-it","media-player","Skip to the next track","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player-skip_to_the_next_track-3","gemma-3-27b-it","media-player","Skip to the next track","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player-skip_to_the_next_track-4","gemma-3-27b-it","media-player","Skip to the next track","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-0","gemma-3-27b-it","media-player","Skip to the next track on the outdoor speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-1","gemma-3-27b-it","media-player","Skip to the next track on the outdoor speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-2","gemma-3-27b-it","media-player","Skip to the next track on the outdoor speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-3","gemma-3-27b-it","media-player","Skip to the next track on the outdoor speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-4","gemma-3-27b-it","media-player","Skip to the next track on the outdoor speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'media_track': 2}, 'got': {'media_track': 1}}}" +"home7_dk_media_player-turn_off_the_music-0","gemma-3-27b-it","media-player","Turn off the music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'off'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-turn_off_the_music-1","gemma-3-27b-it","media-player","Turn off the music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'off'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-turn_off_the_music-2","gemma-3-27b-it","media-player","Turn off the music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'off'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-turn_off_the_music-3","gemma-3-27b-it","media-player","Turn off the music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'off'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-turn_off_the_music-4","gemma-3-27b-it","media-player","Turn off the music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'off'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-turn_off_the_outdoor_speakers-0","gemma-3-27b-it","media-player","Turn off the Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'off'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-turn_off_the_outdoor_speakers-1","gemma-3-27b-it","media-player","Turn off the Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'off'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-turn_off_the_outdoor_speakers-2","gemma-3-27b-it","media-player","Turn off the Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'off'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-turn_off_the_outdoor_speakers-3","gemma-3-27b-it","media-player","Turn off the Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'off'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-turn_off_the_outdoor_speakers-4","gemma-3-27b-it","media-player","Turn off the Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'off'}, 'got': {'state': 'playing'}}}" +"home7_dk_media_player-turn_the_volume_down_to_50-0","gemma-3-27b-it","media-player","Turn the volume down to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.5}, 'got': {'volume_level': 0.9}}}" +"home7_dk_media_player-turn_the_volume_down_to_50-1","gemma-3-27b-it","media-player","Turn the volume down to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.5}, 'got': {'volume_level': 0.9}}}" +"home7_dk_media_player-turn_the_volume_down_to_50-2","gemma-3-27b-it","media-player","Turn the volume down to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.5}, 'got': {'volume_level': 0.9}}}" +"home7_dk_media_player-turn_the_volume_down_to_50-3","gemma-3-27b-it","media-player","Turn the volume down to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.5}, 'got': {'volume_level': 0.9}}}" +"home7_dk_media_player-turn_the_volume_down_to_50-4","gemma-3-27b-it","media-player","Turn the volume down to 50%","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'volume_level': 0.5}, 'got': {'volume_level': 0.9}}}" +"home7_dk_media_player-unpause_the_outdoor_speakers-0","gemma-3-27b-it","media-player","Unpause the Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" +"home7_dk_media_player-unpause_the_outdoor_speakers-1","gemma-3-27b-it","media-player","Unpause the Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" +"home7_dk_media_player-unpause_the_outdoor_speakers-2","gemma-3-27b-it","media-player","Unpause the Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" +"home7_dk_media_player-unpause_the_outdoor_speakers-3","gemma-3-27b-it","media-player","Unpause the Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" +"home7_dk_media_player-unpause_the_outdoor_speakers-4","gemma-3-27b-it","media-player","Unpause the Outdoor Speakers","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" +"home7_dk_media_player-unpause_the_rooftop_terrace_music-0","gemma-3-27b-it","media-player","Unpause the Rooftop Terrace music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" +"home7_dk_media_player-unpause_the_rooftop_terrace_music-1","gemma-3-27b-it","media-player","Unpause the Rooftop Terrace music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" +"home7_dk_media_player-unpause_the_rooftop_terrace_music-2","gemma-3-27b-it","media-player","Unpause the Rooftop Terrace music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" +"home7_dk_media_player-unpause_the_rooftop_terrace_music-3","gemma-3-27b-it","media-player","Unpause the Rooftop Terrace music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" +"home7_dk_media_player-unpause_the_rooftop_terrace_music-4","gemma-3-27b-it","media-player","Unpause the Rooftop Terrace music","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'media_player.outdoor_speakers': {'expected': {'state': 'playing'}, 'got': {'state': 'paused'}}}" diff --git a/reports/assist/2026.2.2/reports-by-category.yaml b/reports/assist/2026.2.2/reports-by-category.yaml new file mode 100644 index 000000000..b214267a9 --- /dev/null +++ b/reports/assist/2026.2.2/reports-by-category.yaml @@ -0,0 +1,42 @@ +--- +- category: cover + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 55 +- category: fan + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 30 +- category: light + good_percent: 23.1% + confidence_interval: 10.2% + good: 15 + total: 65 +- category: lock + good_percent: 44.4% + confidence_interval: 14.5% + good: 20 + total: 45 +- category: media-player + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 90 +- category: todo + good_percent: 11.1% + confidence_interval: 9.2% + good: 5 + total: 45 +- category: vacuum + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 85 +- category: valve + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 45 + diff --git a/reports/assist/2026.2.2/reports-by-model-category.yaml b/reports/assist/2026.2.2/reports-by-model-category.yaml new file mode 100644 index 000000000..bc0f10426 --- /dev/null +++ b/reports/assist/2026.2.2/reports-by-model-category.yaml @@ -0,0 +1,42 @@ +--- +- model_id-category: gemma-3-27b-it-cover + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 55 +- model_id-category: gemma-3-27b-it-fan + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 30 +- model_id-category: gemma-3-27b-it-light + good_percent: 23.1% + confidence_interval: 10.2% + good: 15 + total: 65 +- model_id-category: gemma-3-27b-it-lock + good_percent: 44.4% + confidence_interval: 14.5% + good: 20 + total: 45 +- model_id-category: gemma-3-27b-it-media-player + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 90 +- model_id-category: gemma-3-27b-it-todo + good_percent: 11.1% + confidence_interval: 9.2% + good: 5 + total: 45 +- model_id-category: gemma-3-27b-it-vacuum + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 85 +- model_id-category: gemma-3-27b-it-valve + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 45 + diff --git a/reports/assist/2026.2.2/reports-by-model-test-name.yaml b/reports/assist/2026.2.2/reports-by-model-test-name.yaml new file mode 100644 index 000000000..43cec15b5 --- /dev/null +++ b/reports/assist/2026.2.2/reports-by-model-test-name.yaml @@ -0,0 +1,22 @@ +--- +- model_id-task_name: gemma-3-27b-it-eval-test_expect_llm_tool_call_args + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 0 +- model_id-task_name: gemma-3-27b-it-eval-test_expect_llm_tool_call_name + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 0 +- model_id-task_name: gemma-3-27b-it-eval-test_expect_response + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 0 +- model_id-task_name: gemma-3-27b-it-eval-test_expected_states + good_percent: 8.7% + confidence_interval: 2.6% + good: 40 + total: 460 + diff --git a/reports/assist/2026.2.2/reports-by-task-id.yaml b/reports/assist/2026.2.2/reports-by-task-id.yaml new file mode 100644 index 000000000..45191d191 --- /dev/null +++ b/reports/assist/2026.2.2/reports-by-task-id.yaml @@ -0,0 +1,2327 @@ +--- +- task_id: dom1_pl_lights-kitchen_light_off-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-kitchen_light_off-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-kitchen_light_off-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-kitchen_light_off-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-kitchen_light_off-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-please_turn_off_the_light-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-please_turn_off_the_light-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-please_turn_off_the_light-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-please_turn_off_the_light-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-please_turn_off_the_light-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-please_turn_on_the_kitchen_light-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-please_turn_on_the_kitchen_light-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-please_turn_on_the_kitchen_light-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-please_turn_on_the_kitchen_light-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-please_turn_on_the_kitchen_light-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-shut_off_the_upstairs_lights_please-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-shut_off_the_upstairs_lights_please-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-shut_off_the_upstairs_lights_please-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-shut_off_the_upstairs_lights_please-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-shut_off_the_upstairs_lights_please-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_off_the_light-0 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: dom1_pl_lights-turn_off_the_light-1 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: dom1_pl_lights-turn_off_the_light-2 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: dom1_pl_lights-turn_off_the_light-3 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: dom1_pl_lights-turn_off_the_light-4 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: dom1_pl_lights-turn_off_the_lights_upstairs-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_off_the_lights_upstairs-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_off_the_lights_upstairs-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_off_the_lights_upstairs-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_off_the_lights_upstairs-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_off_the_living_room_light_then_turn_on_the_kitchen_light-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_on_all_the_bedroom_lights_upstairs-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_on_all_the_upstairs_lights-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_on_all_the_upstairs_lights-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_on_all_the_upstairs_lights-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_on_all_the_upstairs_lights-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_on_all_the_upstairs_lights-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-0 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-1 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-2 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-3 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: dom1_pl_lights-turn_on_both_the_kitchen_light_and_living_room_lights-4 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_on_the_kitchen_light_and_then_turn_off_the_living_room_light-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_on_the_light_in_the_kitchen_then_turn_off_the_light_in_the_living_room-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-0 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-1 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-2 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-3 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: dom1_pl_lights-turn_on_the_living_room_and_kitchen_lights-4 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: dom1_pl_todo-add_apples_to_my_trader_joe_s_list-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-add_apples_to_my_trader_joe_s_list-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-add_apples_to_my_trader_joe_s_list-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-add_apples_to_my_trader_joe_s_list-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-add_apples_to_my_trader_joe_s_list-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-add_history_homework_to_my_personal_tasks-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-add_history_homework_to_my_personal_tasks-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-add_history_homework_to_my_personal_tasks-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-add_history_homework_to_my_personal_tasks-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-add_history_homework_to_my_personal_tasks-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-add_history_homework_to_my_tasks-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-add_history_homework_to_my_tasks-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-add_history_homework_to_my_tasks-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-add_history_homework_to_my_tasks-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-add_history_homework_to_my_tasks-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-put_apples_on_the_shopping_list-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-put_apples_on_the_shopping_list-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-put_apples_on_the_shopping_list-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-put_apples_on_the_shopping_list-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-put_apples_on_the_shopping_list-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-put_apples_on_the_trader_joes_list-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-put_apples_on_the_trader_joes_list-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-put_apples_on_the_trader_joes_list-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-put_apples_on_the_trader_joes_list-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-put_apples_on_the_trader_joes_list-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-put_clean_the_kitchen_on_personal_tasks_list-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-put_history_homework_on_my_task_list-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-put_history_homework_on_my_task_list-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-put_history_homework_on_my_task_list-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-put_history_homework_on_my_task_list-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-put_history_homework_on_my_task_list-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-put_history_homework_on_personal_tasks_list-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-put_history_homework_on_personal_tasks_list-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-put_history_homework_on_personal_tasks_list-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-put_history_homework_on_personal_tasks_list-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-put_history_homework_on_personal_tasks_list-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: dom1_pl_todo-put_milk_on_the_list-0 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: dom1_pl_todo-put_milk_on_the_list-1 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: dom1_pl_todo-put_milk_on_the_list-2 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: dom1_pl_todo-put_milk_on_the_list-3 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: dom1_pl_todo-put_milk_on_the_list-4 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: home1_us_cover_garage-close_the_garage_door-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage-close_the_garage_door-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage-close_the_garage_door-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage-close_the_garage_door-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage-close_the_garage_door-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage-open_the_garage_door-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage-open_the_garage_door-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage-open_the_garage_door-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage-open_the_garage_door-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage-open_the_garage_door-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage-please_open_the_garage_door-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage-please_open_the_garage_door-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage-please_open_the_garage_door-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage-please_open_the_garage_door-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_cover_garage-please_open_the_garage_door-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_media_player-set_the_volume_to_0-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 0 +- task_id: home1_us_media_player-set_the_volume_to_0-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 0 +- task_id: home1_us_media_player-set_the_volume_to_0-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 0 +- task_id: home1_us_media_player-set_the_volume_to_0-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 0 +- task_id: home1_us_media_player-set_the_volume_to_0-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 0 +- task_id: home1_us_smart_lock-lock_all_the_locks_please-0 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: home1_us_smart_lock-lock_all_the_locks_please-1 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: home1_us_smart_lock-lock_all_the_locks_please-2 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: home1_us_smart_lock-lock_all_the_locks_please-3 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: home1_us_smart_lock-lock_all_the_locks_please-4 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: home1_us_smart_lock-lock_the_door-0 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: home1_us_smart_lock-lock_the_door-1 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: home1_us_smart_lock-lock_the_door-2 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: home1_us_smart_lock-lock_the_door-3 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: home1_us_smart_lock-lock_the_door-4 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: home1_us_smart_lock-lock_the_entry_lock-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_smart_lock-lock_the_entry_lock-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_smart_lock-lock_the_entry_lock-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_smart_lock-lock_the_entry_lock-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_smart_lock-lock_the_entry_lock-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_smart_lock-lock_the_front_door_lock-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_smart_lock-lock_the_front_door_lock-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_smart_lock-lock_the_front_door_lock-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_smart_lock-lock_the_front_door_lock-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_smart_lock-lock_the_front_door_lock-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_smart_lock-lock_the_lock-0 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: home1_us_smart_lock-lock_the_lock-1 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: home1_us_smart_lock-lock_the_lock-2 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: home1_us_smart_lock-lock_the_lock-3 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: home1_us_smart_lock-lock_the_lock-4 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: home1_us_smart_lock-lock_the_smart_lock-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_smart_lock-lock_the_smart_lock-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_smart_lock-lock_the_smart_lock-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_smart_lock-lock_the_smart_lock-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_smart_lock-lock_the_smart_lock-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_smart_lock-unlock_all_the_doors-0 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: home1_us_smart_lock-unlock_all_the_doors-1 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: home1_us_smart_lock-unlock_all_the_doors-2 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: home1_us_smart_lock-unlock_all_the_doors-3 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: home1_us_smart_lock-unlock_all_the_doors-4 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: home1_us_smart_lock-unlock_the_entry_lock-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_smart_lock-unlock_the_entry_lock-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_smart_lock-unlock_the_entry_lock-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_smart_lock-unlock_the_entry_lock-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_smart_lock-unlock_the_entry_lock-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_smart_lock-unlock_the_smart_lock-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_smart_lock-unlock_the_smart_lock-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_smart_lock-unlock_the_smart_lock-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_smart_lock-unlock_the_smart_lock-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_smart_lock-unlock_the_smart_lock-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-clean_the_living_room-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-clean_the_living_room-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-clean_the_living_room-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-clean_the_living_room-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-clean_the_living_room-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-please_start_cleaning-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-please_start_cleaning-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-please_start_cleaning-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-please_start_cleaning-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-please_start_cleaning-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-return_roborock_downstairs_to_base-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-return_roborock_downstairs_to_base-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-return_roborock_downstairs_to_base-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-return_roborock_downstairs_to_base-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-return_roborock_downstairs_to_base-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-roborock_downstairs_return_to_base-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-roborock_downstairs_return_to_base-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-roborock_downstairs_return_to_base-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-roborock_downstairs_return_to_base-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-roborock_downstairs_return_to_base-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-roborock_return_to_base-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-roborock_return_to_base-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-roborock_return_to_base-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-roborock_return_to_base-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-roborock_return_to_base-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-start_roborock-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-start_roborock-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-start_roborock-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-start_roborock-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-start_roborock-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-start_roborock_downstairs-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-start_roborock_downstairs-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-start_roborock_downstairs-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-start_roborock_downstairs-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-start_roborock_downstairs-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-start_roborock_downstairs_vacuum-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-start_roborock_downstairs_vacuum-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-start_roborock_downstairs_vacuum-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-start_roborock_downstairs_vacuum-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-start_roborock_downstairs_vacuum-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-start_the_vacuum-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-start_the_vacuum-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-start_the_vacuum-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-start_the_vacuum-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-start_the_vacuum-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-start_vacuuming-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-start_vacuuming-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-start_vacuuming-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-start_vacuuming-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-start_vacuuming-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-stop_roborock_downstairs-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-stop_roborock_downstairs-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-stop_roborock_downstairs-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-stop_roborock_downstairs-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-stop_roborock_downstairs-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-stop_the_downstairs_vacuum-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-stop_the_downstairs_vacuum-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-stop_the_downstairs_vacuum-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-stop_the_downstairs_vacuum-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-stop_the_downstairs_vacuum-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-stop_the_vacuum-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-stop_the_vacuum-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-stop_the_vacuum-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-stop_the_vacuum-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-stop_the_vacuum-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-stop_vacuuming-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-stop_vacuuming-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-stop_vacuuming-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-stop_vacuuming-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-stop_vacuuming-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-turn_on_the_vacuum-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-turn_on_the_vacuum-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-turn_on_the_vacuum-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-turn_on_the_vacuum-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-turn_on_the_vacuum-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-vacuum_downstairs-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-vacuum_downstairs-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-vacuum_downstairs-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-vacuum_downstairs-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-vacuum_downstairs-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-vacuum_return_to_base-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-vacuum_return_to_base-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-vacuum_return_to_base-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-vacuum_return_to_base-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home1_us_vacuum-vacuum_return_to_base-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-close_the_front_yard_valve-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-close_the_front_yard_valve-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-close_the_front_yard_valve-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-close_the_front_yard_valve-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-close_the_front_yard_valve-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-close_the_irrigation_valve-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-close_the_irrigation_valve-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-close_the_irrigation_valve-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-close_the_irrigation_valve-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-close_the_irrigation_valve-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-open_the_front_yard_valve-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-open_the_front_yard_valve-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-open_the_front_yard_valve-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-open_the_front_yard_valve-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-open_the_front_yard_valve-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-open_the_irrigation_valve-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-open_the_irrigation_valve-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-open_the_irrigation_valve-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-open_the_irrigation_valve-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-open_the_irrigation_valve-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-set_the_irrigation_valve_to_50-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-set_the_irrigation_valve_to_50-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-set_the_irrigation_valve_to_50-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-set_the_irrigation_valve_to_50-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-set_the_irrigation_valve_to_50-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-turn_off_the_water_in_the_front_yard-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-turn_off_the_water_in_the_front_yard-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-turn_off_the_water_in_the_front_yard-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-turn_off_the_water_in_the_front_yard-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-turn_off_the_water_in_the_front_yard-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-turn_on_the_water_in_the_front_yard-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-turn_on_the_water_in_the_front_yard-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-turn_on_the_water_in_the_front_yard-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-turn_on_the_water_in_the_front_yard-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-turn_on_the_water_in_the_front_yard-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-water_the_front_yard-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-water_the_front_yard-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-water_the_front_yard-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-water_the_front_yard-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-water_the_front_yard-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-water_the_garden-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-water_the_garden-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-water_the_garden-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-water_the_garden-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home2_ru_water_valve-water_the_garden-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_off_the_bedroom_fan-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_off_the_bedroom_fan-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_off_the_bedroom_fan-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_off_the_bedroom_fan-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_off_the_bedroom_fan-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_off_the_fan-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_off_the_fan-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_off_the_fan-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_off_the_fan-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_off_the_fan-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_on_the_bedroom_1_fan-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_on_the_bedroom_1_fan-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_on_the_bedroom_1_fan-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_on_the_bedroom_1_fan-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_on_the_bedroom_1_fan-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_on_the_bedroom_fan-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_on_the_bedroom_fan-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_on_the_bedroom_fan-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_on_the_bedroom_fan-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_on_the_bedroom_fan-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_on_the_fan-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_on_the_fan-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_on_the_fan-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_on_the_fan-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_on_the_fan-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_on_the_fan_in_the_bedroom-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_on_the_fan_in_the_bedroom-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_on_the_fan_in_the_bedroom-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_on_the_fan_in_the_bedroom-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home5_cn_fan-turn_on_the_fan_in_the_bedroom-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-close_the_curtains-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-close_the_curtains-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-close_the_curtains-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-close_the_curtains-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-close_the_curtains-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-close_the_living_room_curtain-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-close_the_living_room_curtain-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-close_the_living_room_curtain-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-close_the_living_room_curtain-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-close_the_living_room_curtain-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-close_the_smart_curtains-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-close_the_smart_curtains-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-close_the_smart_curtains-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-close_the_smart_curtains-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-close_the_smart_curtains-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-open_the_living_room_curtains-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-open_the_living_room_curtains-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-open_the_living_room_curtains-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-open_the_living_room_curtains-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-open_the_living_room_curtains-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-open_the_living_room_smart_curtain-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-open_the_living_room_smart_curtain-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-open_the_living_room_smart_curtain-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-open_the_living_room_smart_curtain-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-open_the_living_room_smart_curtain-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-open_the_smart_curtain-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-open_the_smart_curtain-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-open_the_smart_curtain-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-open_the_smart_curtain-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-open_the_smart_curtain-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-set_the_living_room_curtains_to_50-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-set_the_living_room_curtains_to_50-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-set_the_living_room_curtains_to_50-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-set_the_living_room_curtains_to_50-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-set_the_living_room_curtains_to_50-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-set_the_smart_curtain_to_50-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-set_the_smart_curtain_to_50-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-set_the_smart_curtain_to_50-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-set_the_smart_curtain_to_50-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_cover_curtain-set_the_smart_curtain_to_50-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-mute_the_music-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-mute_the_music-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-mute_the_music-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-mute_the_music-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-mute_the_music-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-mute_the_outdoor_speakers-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-mute_the_outdoor_speakers-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-mute_the_outdoor_speakers-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-mute_the_outdoor_speakers-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-mute_the_outdoor_speakers-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-next_song-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-next_song-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-next_song-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-next_song-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-next_song-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-pause_outdoor_speakers-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-pause_outdoor_speakers-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-pause_outdoor_speakers-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-pause_outdoor_speakers-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-pause_outdoor_speakers-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-pause_the_music-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-pause_the_music-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-pause_the_music-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-pause_the_music-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-pause_the_music-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-pause_the_music_outside-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-pause_the_music_outside-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-pause_the_music_outside-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-pause_the_music_outside-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-pause_the_music_outside-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-pause_the_rooftop_terrace_music-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-pause_the_rooftop_terrace_music-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-pause_the_rooftop_terrace_music-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-pause_the_rooftop_terrace_music-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-pause_the_rooftop_terrace_music-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-resume_outdoor_speakers-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-resume_outdoor_speakers-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-resume_outdoor_speakers-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-resume_outdoor_speakers-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-resume_outdoor_speakers-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-resume_playing_the_music-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-resume_playing_the_music-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-resume_playing_the_music-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-resume_playing_the_music-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-resume_playing_the_music-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-set_outdoor_speakers_volume_to_50-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-set_outdoor_speakers_volume_to_50-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-set_outdoor_speakers_volume_to_50-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-set_outdoor_speakers_volume_to_50-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-set_outdoor_speakers_volume_to_50-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-skip_song_on_outdoor_speakers-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-skip_song_on_outdoor_speakers-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-skip_song_on_outdoor_speakers-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-skip_song_on_outdoor_speakers-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-skip_song_on_outdoor_speakers-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-skip_to_the_next_track-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-skip_to_the_next_track-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-skip_to_the_next_track-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-skip_to_the_next_track-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-skip_to_the_next_track-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-skip_to_the_next_track_on_the_outdoor_speakers-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-turn_off_the_music-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-turn_off_the_music-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-turn_off_the_music-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-turn_off_the_music-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-turn_off_the_music-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-turn_off_the_outdoor_speakers-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-turn_off_the_outdoor_speakers-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-turn_off_the_outdoor_speakers-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-turn_off_the_outdoor_speakers-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-turn_off_the_outdoor_speakers-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-turn_the_volume_down_to_50-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-turn_the_volume_down_to_50-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-turn_the_volume_down_to_50-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-turn_the_volume_down_to_50-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-turn_the_volume_down_to_50-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-unpause_the_outdoor_speakers-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-unpause_the_outdoor_speakers-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-unpause_the_outdoor_speakers-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-unpause_the_outdoor_speakers-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-unpause_the_outdoor_speakers-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-unpause_the_rooftop_terrace_music-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-unpause_the_rooftop_terrace_music-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-unpause_the_rooftop_terrace_music-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-unpause_the_rooftop_terrace_music-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: home7_dk_media_player-unpause_the_rooftop_terrace_music-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 + diff --git a/reports/assist/2026.2.2/reports-by-test-name.yaml b/reports/assist/2026.2.2/reports-by-test-name.yaml new file mode 100644 index 000000000..3e83b2038 --- /dev/null +++ b/reports/assist/2026.2.2/reports-by-test-name.yaml @@ -0,0 +1,22 @@ +--- +- task_name: eval-test_expect_llm_tool_call_args + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 0 +- task_name: eval-test_expect_llm_tool_call_name + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 0 +- task_name: eval-test_expect_response + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 0 +- task_name: eval-test_expected_states + good_percent: 8.7% + confidence_interval: 2.6% + good: 40 + total: 460 + diff --git a/reports/assist/2026.2.2/reports-token-stats.yaml b/reports/assist/2026.2.2/reports-token-stats.yaml new file mode 100644 index 000000000..63b69c89f --- /dev/null +++ b/reports/assist/2026.2.2/reports-token-stats.yaml @@ -0,0 +1,2 @@ +--- [] + diff --git a/reports/assist/2026.2.2/reports.yaml b/reports/assist/2026.2.2/reports.yaml new file mode 100644 index 000000000..58b196c51 --- /dev/null +++ b/reports/assist/2026.2.2/reports.yaml @@ -0,0 +1,7 @@ +--- +- model_id: gemma-3-27b-it + good_percent: 8.7% + confidence_interval: 2.6% + good: 40 + total: 460 + diff --git a/reports/automations/2026.2.2/gemma-3-27b-it/_scrape_context.yaml b/reports/automations/2026.2.2/gemma-3-27b-it/_scrape_context.yaml new file mode 100644 index 000000000..9fe85ad9f --- /dev/null +++ b/reports/automations/2026.2.2/gemma-3-27b-it/_scrape_context.yaml @@ -0,0 +1,19 @@ +--- +uuid: d44ef9d0-9e07-4890-9b9f-3aa162333806 +timestamp: 2026-02-16 08:07:47.161669 +scrape_config: + dataset: automations + dataset_path: datasets/automations + dataset_version: v0 + model_id: gemma-3-27b-it + model_output_path: reports/automations/2026.2.2 +version: 2026.2.2 +context: + user: runner + argv: + - /home/runner/work/openrouter-benchmarks/openrouter-benchmarks/.venv/bin/pytest + - home_assistant_datasets/tool/automation/collect + - --models=gemma-3-27b-it + - --dataset=datasets/automations/ + - --model_output_dir=reports/automations/2026.2.2 +notes: '' diff --git a/reports/automations/2026.2.2/gemma-3-27b-it/door_left_open_door_left_open-door_left_open-0.yaml b/reports/automations/2026.2.2/gemma-3-27b-it/door_left_open_door_left_open-door_left_open-0.yaml new file mode 100644 index 000000000..0c3208443 --- /dev/null +++ b/reports/automations/2026.2.2/gemma-3-27b-it/door_left_open_door_left_open-door_left_open-0.yaml @@ -0,0 +1,377 @@ +--- +uuid: 61520a82-7f17-4a8a-ac19-d0f6f05a5f61 +task_id: door_left_open_door_left_open-door_left_open-0 +model_id: gemma-3-27b-it +category: media_player +task: + input_text: | + # Door left open + + ## Problem statement + + Create an blueprint automation to play an alert on the speaker when the door is + left open. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Play an message on the living room speaker if the garage door has been left open for more than 30 minutes + - When the front door is ajar for 5 minutes, play a sound on the kitchen bluetooth speaker. + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Selector Type | Description | + | --------------- | ------------- | ------------------------------------------------------------------------------------- | + | `door_sensor` | `entity` | A `binary_sensor` entity selector door sensor that is the target of the automation. | + | `alert_media` | `media` | The target for the `media` selector for the media player and media id with the alter. | + | `open_duration` | `duration` | A `duration` selector, the amount of time before playing the alert. | + + The automation should play the selected media when the door has been open for the specified duration. +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: | + # Door left open + + ## Problem statement + + Create an blueprint automation to play an alert on the speaker when the door is + left open. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Play an message on the living room speaker if the garage door has been left open for more than 30 minutes + - When the front door is ajar for 5 minutes, play a sound on the kitchen bluetooth speaker. + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Selector Type | Description | + | --------------- | ------------- | ------------------------------------------------------------------------------------- | + | `door_sensor` | `entity` | A `binary_sensor` entity selector door sensor that is the target of the automation. | + | `alert_media` | `media` | The target for the `media` selector for the media player and media id with the alter. | + | `open_duration` | `duration` | A `duration` selector, the amount of time before playing the alert. | + + The automation should play the selected media when the door has been open for the specified duration. + context: + id: 01KHJQVJYEEFY19RRV8V8FP99Z + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:48.942412+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Air Quality Sensor Generic + domain: sensor + areas: Yoga Studio + - names: Bedroom Bedroom Light + domain: light + areas: Bedroom + - names: Bedroom Light + domain: light + areas: Master Bedroom + - names: Bedroom Smart Speaker + domain: media_player + areas: Bedroom + - names: Citrus Grove Light + domain: light + areas: Citrus Grove + - names: Dining Room Light + domain: light + areas: Dining room + - names: Dishwasher + domain: switch + areas: Kitchen + - names: Dishwasher Energy + domain: sensor + areas: Kitchen + - names: Entry Door + domain: binary_sensor + areas: Entry + - names: Entry Door Battery + domain: binary_sensor + areas: Entry + - names: Entry Door Battery + domain: sensor + areas: Entry + - names: Guest Bedroom Bedroom Light + domain: light + areas: Guest Bedroom + - names: Guest Bedroom Smart Speaker + domain: media_player + areas: Guest Bedroom + - names: Guest House Light + domain: light + areas: Guest House + - names: Guest House Smart Speaker + domain: media_player + areas: Guest House + - names: Guest House Thermostat + domain: climate + areas: Guest House + - names: Guest House Thermostat Humidity + domain: sensor + areas: Guest House + - names: Guest House Thermostat Temperature + domain: sensor + areas: Guest House + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living room + - names: Master Bedroom Smart Speaker + domain: media_player + areas: Master Bedroom + - names: Master Bedroom Tv + domain: media_player + areas: Master Bedroom + - names: Oven + domain: switch + areas: Kitchen + - names: Oven Energy + domain: sensor + areas: Kitchen + - names: Pool Heater + domain: switch + areas: Pool + - names: Pool Heater Energy + domain: sensor + areas: Pool + - names: Pool Light + domain: light + areas: Pool + - names: Pool Pump + domain: switch + areas: Pool + - names: Pool Pump Energy + domain: sensor + areas: Pool + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living room + - names: Sprinkler + domain: valve + areas: Citrus Grove + - names: Terrace Light + domain: light + areas: Terrace + - names: Terrace Smart Speaker + domain: media_player + areas: Terrace + - names: Thermostat + domain: climate + areas: Master Bedroom + - names: Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Tv + domain: media_player + areas: Living room + - names: Yoga Studio Light + domain: light + areas: Yoga Studio + created: 2026-02-16 08:07:48.944445+00:00 + - role: user + content: | + # Door left open + + ## Problem statement + + Create an blueprint automation to play an alert on the speaker when the door is + left open. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Play an message on the living room speaker if the garage door has been left open for more than 30 minutes + - When the front door is ajar for 5 minutes, play a sound on the kitchen bluetooth speaker. + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Selector Type | Description | + | --------------- | ------------- | ------------------------------------------------------------------------------------- | + | `door_sensor` | `entity` | A `binary_sensor` entity selector door sensor that is the target of the automation. | + | `alert_media` | `media` | The target for the `media` selector for the media player and media id with the alter. | + | `open_duration` | `duration` | A `duration` selector, the amount of time before playing the alert. | + + The automation should play the selected media when the door has been open for the specified duration. + attachments: null + created: 2026-02-16 08:07:48.942497+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f94481cc880>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f9428bcbd70>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:48.944462+00:00 + duration_ms: 180.545 + tries: 1 diff --git a/reports/automations/2026.2.2/gemma-3-27b-it/door_left_open_door_left_open-door_left_open-1.yaml b/reports/automations/2026.2.2/gemma-3-27b-it/door_left_open_door_left_open-door_left_open-1.yaml new file mode 100644 index 000000000..8eb49dcd8 --- /dev/null +++ b/reports/automations/2026.2.2/gemma-3-27b-it/door_left_open_door_left_open-door_left_open-1.yaml @@ -0,0 +1,377 @@ +--- +uuid: 40893d10-b060-4cc5-b9b6-b7fd9e76c2a6 +task_id: door_left_open_door_left_open-door_left_open-1 +model_id: gemma-3-27b-it +category: media_player +task: + input_text: | + # Door left open + + ## Problem statement + + Create an blueprint automation to play an alert on the speaker when the door is + left open. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Play an message on the living room speaker if the garage door has been left open for more than 30 minutes + - When the front door is ajar for 5 minutes, play a sound on the kitchen bluetooth speaker. + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Selector Type | Description | + | --------------- | ------------- | ------------------------------------------------------------------------------------- | + | `door_sensor` | `entity` | A `binary_sensor` entity selector door sensor that is the target of the automation. | + | `alert_media` | `media` | The target for the `media` selector for the media player and media id with the alter. | + | `open_duration` | `duration` | A `duration` selector, the amount of time before playing the alert. | + + The automation should play the selected media when the door has been open for the specified duration. +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: | + # Door left open + + ## Problem statement + + Create an blueprint automation to play an alert on the speaker when the door is + left open. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Play an message on the living room speaker if the garage door has been left open for more than 30 minutes + - When the front door is ajar for 5 minutes, play a sound on the kitchen bluetooth speaker. + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Selector Type | Description | + | --------------- | ------------- | ------------------------------------------------------------------------------------- | + | `door_sensor` | `entity` | A `binary_sensor` entity selector door sensor that is the target of the automation. | + | `alert_media` | `media` | The target for the `media` selector for the media player and media id with the alter. | + | `open_duration` | `duration` | A `duration` selector, the amount of time before playing the alert. | + + The automation should play the selected media when the door has been open for the specified duration. + context: + id: 01KHJQVKBQE0TZYDDVWGTBQ0MF + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:49.367180+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Air Quality Sensor Generic + domain: sensor + areas: Yoga Studio + - names: Bedroom Bedroom Light + domain: light + areas: Bedroom + - names: Bedroom Light + domain: light + areas: Master Bedroom + - names: Bedroom Smart Speaker + domain: media_player + areas: Bedroom + - names: Citrus Grove Light + domain: light + areas: Citrus Grove + - names: Dining Room Light + domain: light + areas: Dining room + - names: Dishwasher + domain: switch + areas: Kitchen + - names: Dishwasher Energy + domain: sensor + areas: Kitchen + - names: Entry Door + domain: binary_sensor + areas: Entry + - names: Entry Door Battery + domain: binary_sensor + areas: Entry + - names: Entry Door Battery + domain: sensor + areas: Entry + - names: Guest Bedroom Bedroom Light + domain: light + areas: Guest Bedroom + - names: Guest Bedroom Smart Speaker + domain: media_player + areas: Guest Bedroom + - names: Guest House Light + domain: light + areas: Guest House + - names: Guest House Smart Speaker + domain: media_player + areas: Guest House + - names: Guest House Thermostat + domain: climate + areas: Guest House + - names: Guest House Thermostat Humidity + domain: sensor + areas: Guest House + - names: Guest House Thermostat Temperature + domain: sensor + areas: Guest House + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living room + - names: Master Bedroom Smart Speaker + domain: media_player + areas: Master Bedroom + - names: Master Bedroom Tv + domain: media_player + areas: Master Bedroom + - names: Oven + domain: switch + areas: Kitchen + - names: Oven Energy + domain: sensor + areas: Kitchen + - names: Pool Heater + domain: switch + areas: Pool + - names: Pool Heater Energy + domain: sensor + areas: Pool + - names: Pool Light + domain: light + areas: Pool + - names: Pool Pump + domain: switch + areas: Pool + - names: Pool Pump Energy + domain: sensor + areas: Pool + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living room + - names: Sprinkler + domain: valve + areas: Citrus Grove + - names: Terrace Light + domain: light + areas: Terrace + - names: Terrace Smart Speaker + domain: media_player + areas: Terrace + - names: Thermostat + domain: climate + areas: Master Bedroom + - names: Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Tv + domain: media_player + areas: Living room + - names: Yoga Studio Light + domain: light + areas: Yoga Studio + created: 2026-02-16 08:07:49.369222+00:00 + - role: user + content: | + # Door left open + + ## Problem statement + + Create an blueprint automation to play an alert on the speaker when the door is + left open. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Play an message on the living room speaker if the garage door has been left open for more than 30 minutes + - When the front door is ajar for 5 minutes, play a sound on the kitchen bluetooth speaker. + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Selector Type | Description | + | --------------- | ------------- | ------------------------------------------------------------------------------------- | + | `door_sensor` | `entity` | A `binary_sensor` entity selector door sensor that is the target of the automation. | + | `alert_media` | `media` | The target for the `media` selector for the media player and media id with the alter. | + | `open_duration` | `duration` | A `duration` selector, the amount of time before playing the alert. | + + The automation should play the selected media when the door has been open for the specified duration. + attachments: null + created: 2026-02-16 08:07:49.367268+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f94492ca140>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f9428bcbd70>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:49.369237+00:00 + duration_ms: 236.694 + tries: 1 diff --git a/reports/automations/2026.2.2/gemma-3-27b-it/door_left_open_door_left_open-door_left_open-2.yaml b/reports/automations/2026.2.2/gemma-3-27b-it/door_left_open_door_left_open-door_left_open-2.yaml new file mode 100644 index 000000000..ddf1c2f79 --- /dev/null +++ b/reports/automations/2026.2.2/gemma-3-27b-it/door_left_open_door_left_open-door_left_open-2.yaml @@ -0,0 +1,377 @@ +--- +uuid: b8c4c997-91fb-4fb5-bb89-f6c217a5c5d8 +task_id: door_left_open_door_left_open-door_left_open-2 +model_id: gemma-3-27b-it +category: media_player +task: + input_text: | + # Door left open + + ## Problem statement + + Create an blueprint automation to play an alert on the speaker when the door is + left open. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Play an message on the living room speaker if the garage door has been left open for more than 30 minutes + - When the front door is ajar for 5 minutes, play a sound on the kitchen bluetooth speaker. + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Selector Type | Description | + | --------------- | ------------- | ------------------------------------------------------------------------------------- | + | `door_sensor` | `entity` | A `binary_sensor` entity selector door sensor that is the target of the automation. | + | `alert_media` | `media` | The target for the `media` selector for the media player and media id with the alter. | + | `open_duration` | `duration` | A `duration` selector, the amount of time before playing the alert. | + + The automation should play the selected media when the door has been open for the specified duration. +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: | + # Door left open + + ## Problem statement + + Create an blueprint automation to play an alert on the speaker when the door is + left open. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Play an message on the living room speaker if the garage door has been left open for more than 30 minutes + - When the front door is ajar for 5 minutes, play a sound on the kitchen bluetooth speaker. + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Selector Type | Description | + | --------------- | ------------- | ------------------------------------------------------------------------------------- | + | `door_sensor` | `entity` | A `binary_sensor` entity selector door sensor that is the target of the automation. | + | `alert_media` | `media` | The target for the `media` selector for the media player and media id with the alter. | + | `open_duration` | `duration` | A `duration` selector, the amount of time before playing the alert. | + + The automation should play the selected media when the door has been open for the specified duration. + context: + id: 01KHJQVKTC6B1V938EYXBSDETN + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:49.836332+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Air Quality Sensor Generic + domain: sensor + areas: Yoga Studio + - names: Bedroom Bedroom Light + domain: light + areas: Bedroom + - names: Bedroom Light + domain: light + areas: Master Bedroom + - names: Bedroom Smart Speaker + domain: media_player + areas: Bedroom + - names: Citrus Grove Light + domain: light + areas: Citrus Grove + - names: Dining Room Light + domain: light + areas: Dining room + - names: Dishwasher + domain: switch + areas: Kitchen + - names: Dishwasher Energy + domain: sensor + areas: Kitchen + - names: Entry Door + domain: binary_sensor + areas: Entry + - names: Entry Door Battery + domain: sensor + areas: Entry + - names: Entry Door Battery + domain: binary_sensor + areas: Entry + - names: Guest Bedroom Bedroom Light + domain: light + areas: Guest Bedroom + - names: Guest Bedroom Smart Speaker + domain: media_player + areas: Guest Bedroom + - names: Guest House Light + domain: light + areas: Guest House + - names: Guest House Smart Speaker + domain: media_player + areas: Guest House + - names: Guest House Thermostat + domain: climate + areas: Guest House + - names: Guest House Thermostat Humidity + domain: sensor + areas: Guest House + - names: Guest House Thermostat Temperature + domain: sensor + areas: Guest House + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living room + - names: Master Bedroom Smart Speaker + domain: media_player + areas: Master Bedroom + - names: Master Bedroom Tv + domain: media_player + areas: Master Bedroom + - names: Oven + domain: switch + areas: Kitchen + - names: Oven Energy + domain: sensor + areas: Kitchen + - names: Pool Heater + domain: switch + areas: Pool + - names: Pool Heater Energy + domain: sensor + areas: Pool + - names: Pool Light + domain: light + areas: Pool + - names: Pool Pump + domain: switch + areas: Pool + - names: Pool Pump Energy + domain: sensor + areas: Pool + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living room + - names: Sprinkler + domain: valve + areas: Citrus Grove + - names: Terrace Light + domain: light + areas: Terrace + - names: Terrace Smart Speaker + domain: media_player + areas: Terrace + - names: Thermostat + domain: climate + areas: Master Bedroom + - names: Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Tv + domain: media_player + areas: Living room + - names: Yoga Studio Light + domain: light + areas: Yoga Studio + created: 2026-02-16 08:07:49.838324+00:00 + - role: user + content: | + # Door left open + + ## Problem statement + + Create an blueprint automation to play an alert on the speaker when the door is + left open. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Play an message on the living room speaker if the garage door has been left open for more than 30 minutes + - When the front door is ajar for 5 minutes, play a sound on the kitchen bluetooth speaker. + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Selector Type | Description | + | --------------- | ------------- | ------------------------------------------------------------------------------------- | + | `door_sensor` | `entity` | A `binary_sensor` entity selector door sensor that is the target of the automation. | + | `alert_media` | `media` | The target for the `media` selector for the media player and media id with the alter. | + | `open_duration` | `duration` | A `duration` selector, the amount of time before playing the alert. | + + The automation should play the selected media when the door has been open for the specified duration. + attachments: null + created: 2026-02-16 08:07:49.836400+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f944906f110>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f9428bcbd70>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:49.838336+00:00 + duration_ms: 193.512 + tries: 1 diff --git a/reports/automations/2026.2.2/gemma-3-27b-it/door_left_open_door_left_open-door_left_open-3.yaml b/reports/automations/2026.2.2/gemma-3-27b-it/door_left_open_door_left_open-door_left_open-3.yaml new file mode 100644 index 000000000..e4ff20b5e --- /dev/null +++ b/reports/automations/2026.2.2/gemma-3-27b-it/door_left_open_door_left_open-door_left_open-3.yaml @@ -0,0 +1,377 @@ +--- +uuid: 320bbbe5-6487-436f-b6ae-474cd109c498 +task_id: door_left_open_door_left_open-door_left_open-3 +model_id: gemma-3-27b-it +category: media_player +task: + input_text: | + # Door left open + + ## Problem statement + + Create an blueprint automation to play an alert on the speaker when the door is + left open. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Play an message on the living room speaker if the garage door has been left open for more than 30 minutes + - When the front door is ajar for 5 minutes, play a sound on the kitchen bluetooth speaker. + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Selector Type | Description | + | --------------- | ------------- | ------------------------------------------------------------------------------------- | + | `door_sensor` | `entity` | A `binary_sensor` entity selector door sensor that is the target of the automation. | + | `alert_media` | `media` | The target for the `media` selector for the media player and media id with the alter. | + | `open_duration` | `duration` | A `duration` selector, the amount of time before playing the alert. | + + The automation should play the selected media when the door has been open for the specified duration. +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: | + # Door left open + + ## Problem statement + + Create an blueprint automation to play an alert on the speaker when the door is + left open. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Play an message on the living room speaker if the garage door has been left open for more than 30 minutes + - When the front door is ajar for 5 minutes, play a sound on the kitchen bluetooth speaker. + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Selector Type | Description | + | --------------- | ------------- | ------------------------------------------------------------------------------------- | + | `door_sensor` | `entity` | A `binary_sensor` entity selector door sensor that is the target of the automation. | + | `alert_media` | `media` | The target for the `media` selector for the media player and media id with the alter. | + | `open_duration` | `duration` | A `duration` selector, the amount of time before playing the alert. | + + The automation should play the selected media when the door has been open for the specified duration. + context: + id: 01KHJQVM87BSVC7DS9R47X2QCP + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:50.279490+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Air Quality Sensor Generic + domain: sensor + areas: Yoga Studio + - names: Bedroom Bedroom Light + domain: light + areas: Bedroom + - names: Bedroom Light + domain: light + areas: Master Bedroom + - names: Bedroom Smart Speaker + domain: media_player + areas: Bedroom + - names: Citrus Grove Light + domain: light + areas: Citrus Grove + - names: Dining Room Light + domain: light + areas: Dining room + - names: Dishwasher + domain: switch + areas: Kitchen + - names: Dishwasher Energy + domain: sensor + areas: Kitchen + - names: Entry Door + domain: binary_sensor + areas: Entry + - names: Entry Door Battery + domain: binary_sensor + areas: Entry + - names: Entry Door Battery + domain: sensor + areas: Entry + - names: Guest Bedroom Bedroom Light + domain: light + areas: Guest Bedroom + - names: Guest Bedroom Smart Speaker + domain: media_player + areas: Guest Bedroom + - names: Guest House Light + domain: light + areas: Guest House + - names: Guest House Smart Speaker + domain: media_player + areas: Guest House + - names: Guest House Thermostat + domain: climate + areas: Guest House + - names: Guest House Thermostat Humidity + domain: sensor + areas: Guest House + - names: Guest House Thermostat Temperature + domain: sensor + areas: Guest House + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living room + - names: Master Bedroom Smart Speaker + domain: media_player + areas: Master Bedroom + - names: Master Bedroom Tv + domain: media_player + areas: Master Bedroom + - names: Oven + domain: switch + areas: Kitchen + - names: Oven Energy + domain: sensor + areas: Kitchen + - names: Pool Heater + domain: switch + areas: Pool + - names: Pool Heater Energy + domain: sensor + areas: Pool + - names: Pool Light + domain: light + areas: Pool + - names: Pool Pump + domain: switch + areas: Pool + - names: Pool Pump Energy + domain: sensor + areas: Pool + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living room + - names: Sprinkler + domain: valve + areas: Citrus Grove + - names: Terrace Light + domain: light + areas: Terrace + - names: Terrace Smart Speaker + domain: media_player + areas: Terrace + - names: Thermostat + domain: climate + areas: Master Bedroom + - names: Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Tv + domain: media_player + areas: Living room + - names: Yoga Studio Light + domain: light + areas: Yoga Studio + created: 2026-02-16 08:07:50.281505+00:00 + - role: user + content: | + # Door left open + + ## Problem statement + + Create an blueprint automation to play an alert on the speaker when the door is + left open. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Play an message on the living room speaker if the garage door has been left open for more than 30 minutes + - When the front door is ajar for 5 minutes, play a sound on the kitchen bluetooth speaker. + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Selector Type | Description | + | --------------- | ------------- | ------------------------------------------------------------------------------------- | + | `door_sensor` | `entity` | A `binary_sensor` entity selector door sensor that is the target of the automation. | + | `alert_media` | `media` | The target for the `media` selector for the media player and media id with the alter. | + | `open_duration` | `duration` | A `duration` selector, the amount of time before playing the alert. | + + The automation should play the selected media when the door has been open for the specified duration. + attachments: null + created: 2026-02-16 08:07:50.279565+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f9448e01b10>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f9428bcbd70>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:50.281517+00:00 + duration_ms: 173.043 + tries: 1 diff --git a/reports/automations/2026.2.2/gemma-3-27b-it/door_left_open_door_left_open-door_left_open-4.yaml b/reports/automations/2026.2.2/gemma-3-27b-it/door_left_open_door_left_open-door_left_open-4.yaml new file mode 100644 index 000000000..ba9ab2922 --- /dev/null +++ b/reports/automations/2026.2.2/gemma-3-27b-it/door_left_open_door_left_open-door_left_open-4.yaml @@ -0,0 +1,377 @@ +--- +uuid: c7650244-f1ab-427f-8a6d-819b604a9067 +task_id: door_left_open_door_left_open-door_left_open-4 +model_id: gemma-3-27b-it +category: media_player +task: + input_text: | + # Door left open + + ## Problem statement + + Create an blueprint automation to play an alert on the speaker when the door is + left open. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Play an message on the living room speaker if the garage door has been left open for more than 30 minutes + - When the front door is ajar for 5 minutes, play a sound on the kitchen bluetooth speaker. + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Selector Type | Description | + | --------------- | ------------- | ------------------------------------------------------------------------------------- | + | `door_sensor` | `entity` | A `binary_sensor` entity selector door sensor that is the target of the automation. | + | `alert_media` | `media` | The target for the `media` selector for the media player and media id with the alter. | + | `open_duration` | `duration` | A `duration` selector, the amount of time before playing the alert. | + + The automation should play the selected media when the door has been open for the specified duration. +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: | + # Door left open + + ## Problem statement + + Create an blueprint automation to play an alert on the speaker when the door is + left open. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Play an message on the living room speaker if the garage door has been left open for more than 30 minutes + - When the front door is ajar for 5 minutes, play a sound on the kitchen bluetooth speaker. + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Selector Type | Description | + | --------------- | ------------- | ------------------------------------------------------------------------------------- | + | `door_sensor` | `entity` | A `binary_sensor` entity selector door sensor that is the target of the automation. | + | `alert_media` | `media` | The target for the `media` selector for the media player and media id with the alter. | + | `open_duration` | `duration` | A `duration` selector, the amount of time before playing the alert. | + + The automation should play the selected media when the door has been open for the specified duration. + context: + id: 01KHJQVMN1JMH5EGGYK6D576CH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:50.689824+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Air Quality Sensor Generic + domain: sensor + areas: Yoga Studio + - names: Bedroom Bedroom Light + domain: light + areas: Bedroom + - names: Bedroom Light + domain: light + areas: Master Bedroom + - names: Bedroom Smart Speaker + domain: media_player + areas: Bedroom + - names: Citrus Grove Light + domain: light + areas: Citrus Grove + - names: Dining Room Light + domain: light + areas: Dining room + - names: Dishwasher + domain: switch + areas: Kitchen + - names: Dishwasher Energy + domain: sensor + areas: Kitchen + - names: Entry Door + domain: binary_sensor + areas: Entry + - names: Entry Door Battery + domain: binary_sensor + areas: Entry + - names: Entry Door Battery + domain: sensor + areas: Entry + - names: Guest Bedroom Bedroom Light + domain: light + areas: Guest Bedroom + - names: Guest Bedroom Smart Speaker + domain: media_player + areas: Guest Bedroom + - names: Guest House Light + domain: light + areas: Guest House + - names: Guest House Smart Speaker + domain: media_player + areas: Guest House + - names: Guest House Thermostat + domain: climate + areas: Guest House + - names: Guest House Thermostat Humidity + domain: sensor + areas: Guest House + - names: Guest House Thermostat Temperature + domain: sensor + areas: Guest House + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living room + - names: Master Bedroom Smart Speaker + domain: media_player + areas: Master Bedroom + - names: Master Bedroom Tv + domain: media_player + areas: Master Bedroom + - names: Oven + domain: switch + areas: Kitchen + - names: Oven Energy + domain: sensor + areas: Kitchen + - names: Pool Heater + domain: switch + areas: Pool + - names: Pool Heater Energy + domain: sensor + areas: Pool + - names: Pool Light + domain: light + areas: Pool + - names: Pool Pump + domain: switch + areas: Pool + - names: Pool Pump Energy + domain: sensor + areas: Pool + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living room + - names: Sprinkler + domain: valve + areas: Citrus Grove + - names: Terrace Light + domain: light + areas: Terrace + - names: Terrace Smart Speaker + domain: media_player + areas: Terrace + - names: Thermostat + domain: climate + areas: Master Bedroom + - names: Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Tv + domain: media_player + areas: Living room + - names: Yoga Studio Light + domain: light + areas: Yoga Studio + created: 2026-02-16 08:07:50.692875+00:00 + - role: user + content: | + # Door left open + + ## Problem statement + + Create an blueprint automation to play an alert on the speaker when the door is + left open. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Play an message on the living room speaker if the garage door has been left open for more than 30 minutes + - When the front door is ajar for 5 minutes, play a sound on the kitchen bluetooth speaker. + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Selector Type | Description | + | --------------- | ------------- | ------------------------------------------------------------------------------------- | + | `door_sensor` | `entity` | A `binary_sensor` entity selector door sensor that is the target of the automation. | + | `alert_media` | `media` | The target for the `media` selector for the media player and media id with the alter. | + | `open_duration` | `duration` | A `duration` selector, the amount of time before playing the alert. | + + The automation should play the selected media when the door has been open for the specified duration. + attachments: null + created: 2026-02-16 08:07:50.689895+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f94492def00>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f9428bcbd70>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:50.692889+00:00 + duration_ms: 167.531 + tries: 1 diff --git a/reports/automations/2026.2.2/gemma-3-27b-it/humidity_fan_humidity_fan-humidity_fan-0.yaml b/reports/automations/2026.2.2/gemma-3-27b-it/humidity_fan_humidity_fan-humidity_fan-0.yaml new file mode 100644 index 000000000..980ca2935 --- /dev/null +++ b/reports/automations/2026.2.2/gemma-3-27b-it/humidity_fan_humidity_fan-humidity_fan-0.yaml @@ -0,0 +1,286 @@ +--- +uuid: 4148242b-2f42-4d3a-a311-36acb4193304 +task_id: humidity_fan_humidity_fan-humidity_fan-0 +model_id: gemma-3-27b-it +category: fan +task: + input_text: | + # Humidity Fan + + ## Problem statement + + Create a blueprint automation to turn on the fan based on the value of the humidity sensor. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the bathroom exhaust fan when the bathroom humidty is above 60% + - Make sure the garage does not get too humid + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. | + | `humidity_level` | A `number` selector to set the value used as the trigger. | + | `fan_entity` | A `fan` entity to turn on when the automation fires. | + + The automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When + triggered, the fan should be turned off. The fan should also be stopped when the + sensor goes back down below the desired level. The automation should not try to + turn the fan on if it is already on, and should not turn the fan off when it is + already off. +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: | + # Humidity Fan + + ## Problem statement + + Create a blueprint automation to turn on the fan based on the value of the humidity sensor. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the bathroom exhaust fan when the bathroom humidty is above 60% + - Make sure the garage does not get too humid + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. | + | `humidity_level` | A `number` selector to set the value used as the trigger. | + | `fan_entity` | A `fan` entity to turn on when the automation fires. | + + The automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When + triggered, the fan should be turned off. The fan should also be stopped when the + sensor goes back down below the desired level. The automation should not try to + turn the fan on if it is already on, and should not turn the fan off when it is + already off. + context: + id: 01KHJQVN0DS6J4B966M7GXMA27 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:51.053883+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bathroom Exhaust Fan + domain: fan + areas: Bathroom + - names: Bathroom Exhaust Fan Humidity + domain: sensor + areas: Bathroom + - names: Bathroom Light + domain: light + areas: Bathroom + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Smart Plug + domain: switch + areas: Kitchen + - names: Smart Plug Energy + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Storage Room Light + domain: light + areas: Storage Room + created: 2026-02-16 08:07:51.055449+00:00 + - role: user + content: | + # Humidity Fan + + ## Problem statement + + Create a blueprint automation to turn on the fan based on the value of the humidity sensor. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the bathroom exhaust fan when the bathroom humidty is above 60% + - Make sure the garage does not get too humid + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. | + | `humidity_level` | A `number` selector to set the value used as the trigger. | + | `fan_entity` | A `fan` entity to turn on when the automation fires. | + + The automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When + triggered, the fan should be turned off. The fan should also be stopped when the + sensor goes back down below the desired level. The automation should not try to + turn the fan on if it is already on, and should not turn the fan off when it is + already off. + attachments: null + created: 2026-02-16 08:07:51.053951+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f9448c2b530>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f9428bcbd70>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:51.055462+00:00 + duration_ms: 165.107 + tries: 1 diff --git a/reports/automations/2026.2.2/gemma-3-27b-it/humidity_fan_humidity_fan-humidity_fan-1.yaml b/reports/automations/2026.2.2/gemma-3-27b-it/humidity_fan_humidity_fan-humidity_fan-1.yaml new file mode 100644 index 000000000..18104394d --- /dev/null +++ b/reports/automations/2026.2.2/gemma-3-27b-it/humidity_fan_humidity_fan-humidity_fan-1.yaml @@ -0,0 +1,286 @@ +--- +uuid: d329239a-2862-4e12-a4d3-a5e0138d09f5 +task_id: humidity_fan_humidity_fan-humidity_fan-1 +model_id: gemma-3-27b-it +category: fan +task: + input_text: | + # Humidity Fan + + ## Problem statement + + Create a blueprint automation to turn on the fan based on the value of the humidity sensor. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the bathroom exhaust fan when the bathroom humidty is above 60% + - Make sure the garage does not get too humid + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. | + | `humidity_level` | A `number` selector to set the value used as the trigger. | + | `fan_entity` | A `fan` entity to turn on when the automation fires. | + + The automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When + triggered, the fan should be turned off. The fan should also be stopped when the + sensor goes back down below the desired level. The automation should not try to + turn the fan on if it is already on, and should not turn the fan off when it is + already off. +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: | + # Humidity Fan + + ## Problem statement + + Create a blueprint automation to turn on the fan based on the value of the humidity sensor. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the bathroom exhaust fan when the bathroom humidty is above 60% + - Make sure the garage does not get too humid + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. | + | `humidity_level` | A `number` selector to set the value used as the trigger. | + | `fan_entity` | A `fan` entity to turn on when the automation fires. | + + The automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When + triggered, the fan should be turned off. The fan should also be stopped when the + sensor goes back down below the desired level. The automation should not try to + turn the fan on if it is already on, and should not turn the fan off when it is + already off. + context: + id: 01KHJQVNCABXPRMWMQNV5WAV7Y + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:51.434667+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bathroom Exhaust Fan + domain: fan + areas: Bathroom + - names: Bathroom Exhaust Fan Humidity + domain: sensor + areas: Bathroom + - names: Bathroom Light + domain: light + areas: Bathroom + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Smart Plug + domain: switch + areas: Kitchen + - names: Smart Plug Energy + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Storage Room Light + domain: light + areas: Storage Room + created: 2026-02-16 08:07:51.437170+00:00 + - role: user + content: | + # Humidity Fan + + ## Problem statement + + Create a blueprint automation to turn on the fan based on the value of the humidity sensor. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the bathroom exhaust fan when the bathroom humidty is above 60% + - Make sure the garage does not get too humid + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. | + | `humidity_level` | A `number` selector to set the value used as the trigger. | + | `fan_entity` | A `fan` entity to turn on when the automation fires. | + + The automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When + triggered, the fan should be turned off. The fan should also be stopped when the + sensor goes back down below the desired level. The automation should not try to + turn the fan on if it is already on, and should not turn the fan off when it is + already off. + attachments: null + created: 2026-02-16 08:07:51.434762+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f9448a4d9b0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f9428bcbd70>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:51.437184+00:00 + duration_ms: 181.641 + tries: 1 diff --git a/reports/automations/2026.2.2/gemma-3-27b-it/humidity_fan_humidity_fan-humidity_fan-2.yaml b/reports/automations/2026.2.2/gemma-3-27b-it/humidity_fan_humidity_fan-humidity_fan-2.yaml new file mode 100644 index 000000000..4cef1506a --- /dev/null +++ b/reports/automations/2026.2.2/gemma-3-27b-it/humidity_fan_humidity_fan-humidity_fan-2.yaml @@ -0,0 +1,286 @@ +--- +uuid: df193723-b322-40a9-b225-2ad6599b8a56 +task_id: humidity_fan_humidity_fan-humidity_fan-2 +model_id: gemma-3-27b-it +category: fan +task: + input_text: | + # Humidity Fan + + ## Problem statement + + Create a blueprint automation to turn on the fan based on the value of the humidity sensor. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the bathroom exhaust fan when the bathroom humidty is above 60% + - Make sure the garage does not get too humid + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. | + | `humidity_level` | A `number` selector to set the value used as the trigger. | + | `fan_entity` | A `fan` entity to turn on when the automation fires. | + + The automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When + triggered, the fan should be turned off. The fan should also be stopped when the + sensor goes back down below the desired level. The automation should not try to + turn the fan on if it is already on, and should not turn the fan off when it is + already off. +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: | + # Humidity Fan + + ## Problem statement + + Create a blueprint automation to turn on the fan based on the value of the humidity sensor. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the bathroom exhaust fan when the bathroom humidty is above 60% + - Make sure the garage does not get too humid + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. | + | `humidity_level` | A `number` selector to set the value used as the trigger. | + | `fan_entity` | A `fan` entity to turn on when the automation fires. | + + The automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When + triggered, the fan should be turned off. The fan should also be stopped when the + sensor goes back down below the desired level. The automation should not try to + turn the fan on if it is already on, and should not turn the fan off when it is + already off. + context: + id: 01KHJQVNQ26E88QG376F6MHB4M + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:51.778965+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bathroom Exhaust Fan + domain: fan + areas: Bathroom + - names: Bathroom Exhaust Fan Humidity + domain: sensor + areas: Bathroom + - names: Bathroom Light + domain: light + areas: Bathroom + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Smart Plug + domain: switch + areas: Kitchen + - names: Smart Plug Energy + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Storage Room Light + domain: light + areas: Storage Room + created: 2026-02-16 08:07:51.780361+00:00 + - role: user + content: | + # Humidity Fan + + ## Problem statement + + Create a blueprint automation to turn on the fan based on the value of the humidity sensor. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the bathroom exhaust fan when the bathroom humidty is above 60% + - Make sure the garage does not get too humid + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. | + | `humidity_level` | A `number` selector to set the value used as the trigger. | + | `fan_entity` | A `fan` entity to turn on when the automation fires. | + + The automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When + triggered, the fan should be turned off. The fan should also be stopped when the + sensor goes back down below the desired level. The automation should not try to + turn the fan on if it is already on, and should not turn the fan off when it is + already off. + attachments: null + created: 2026-02-16 08:07:51.779032+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f94489ebcc0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f9428bcbd70>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:51.780372+00:00 + duration_ms: 167.338 + tries: 1 diff --git a/reports/automations/2026.2.2/gemma-3-27b-it/humidity_fan_humidity_fan-humidity_fan-3.yaml b/reports/automations/2026.2.2/gemma-3-27b-it/humidity_fan_humidity_fan-humidity_fan-3.yaml new file mode 100644 index 000000000..bb08ad0ba --- /dev/null +++ b/reports/automations/2026.2.2/gemma-3-27b-it/humidity_fan_humidity_fan-humidity_fan-3.yaml @@ -0,0 +1,286 @@ +--- +uuid: aff84fc1-ea0a-48c5-afcc-9d822018b857 +task_id: humidity_fan_humidity_fan-humidity_fan-3 +model_id: gemma-3-27b-it +category: fan +task: + input_text: | + # Humidity Fan + + ## Problem statement + + Create a blueprint automation to turn on the fan based on the value of the humidity sensor. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the bathroom exhaust fan when the bathroom humidty is above 60% + - Make sure the garage does not get too humid + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. | + | `humidity_level` | A `number` selector to set the value used as the trigger. | + | `fan_entity` | A `fan` entity to turn on when the automation fires. | + + The automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When + triggered, the fan should be turned off. The fan should also be stopped when the + sensor goes back down below the desired level. The automation should not try to + turn the fan on if it is already on, and should not turn the fan off when it is + already off. +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: | + # Humidity Fan + + ## Problem statement + + Create a blueprint automation to turn on the fan based on the value of the humidity sensor. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the bathroom exhaust fan when the bathroom humidty is above 60% + - Make sure the garage does not get too humid + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. | + | `humidity_level` | A `number` selector to set the value used as the trigger. | + | `fan_entity` | A `fan` entity to turn on when the automation fires. | + + The automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When + triggered, the fan should be turned off. The fan should also be stopped when the + sensor goes back down below the desired level. The automation should not try to + turn the fan on if it is already on, and should not turn the fan off when it is + already off. + context: + id: 01KHJQVP2B54615D4J3ZBFXCBJ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:52.139354+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bathroom Exhaust Fan + domain: fan + areas: Bathroom + - names: Bathroom Exhaust Fan Humidity + domain: sensor + areas: Bathroom + - names: Bathroom Light + domain: light + areas: Bathroom + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Smart Plug + domain: switch + areas: Kitchen + - names: Smart Plug Energy + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Storage Room Light + domain: light + areas: Storage Room + created: 2026-02-16 08:07:52.140777+00:00 + - role: user + content: | + # Humidity Fan + + ## Problem statement + + Create a blueprint automation to turn on the fan based on the value of the humidity sensor. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the bathroom exhaust fan when the bathroom humidty is above 60% + - Make sure the garage does not get too humid + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. | + | `humidity_level` | A `number` selector to set the value used as the trigger. | + | `fan_entity` | A `fan` entity to turn on when the automation fires. | + + The automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When + triggered, the fan should be turned off. The fan should also be stopped when the + sensor goes back down below the desired level. The automation should not try to + turn the fan on if it is already on, and should not turn the fan off when it is + already off. + attachments: null + created: 2026-02-16 08:07:52.139424+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f94490e37f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f9428bcbd70>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:52.140787+00:00 + duration_ms: 174.452 + tries: 1 diff --git a/reports/automations/2026.2.2/gemma-3-27b-it/humidity_fan_humidity_fan-humidity_fan-4.yaml b/reports/automations/2026.2.2/gemma-3-27b-it/humidity_fan_humidity_fan-humidity_fan-4.yaml new file mode 100644 index 000000000..e3c7315ac --- /dev/null +++ b/reports/automations/2026.2.2/gemma-3-27b-it/humidity_fan_humidity_fan-humidity_fan-4.yaml @@ -0,0 +1,286 @@ +--- +uuid: 7d085c5f-fc8a-4751-8b9b-cae4c6ad2a36 +task_id: humidity_fan_humidity_fan-humidity_fan-4 +model_id: gemma-3-27b-it +category: fan +task: + input_text: | + # Humidity Fan + + ## Problem statement + + Create a blueprint automation to turn on the fan based on the value of the humidity sensor. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the bathroom exhaust fan when the bathroom humidty is above 60% + - Make sure the garage does not get too humid + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. | + | `humidity_level` | A `number` selector to set the value used as the trigger. | + | `fan_entity` | A `fan` entity to turn on when the automation fires. | + + The automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When + triggered, the fan should be turned off. The fan should also be stopped when the + sensor goes back down below the desired level. The automation should not try to + turn the fan on if it is already on, and should not turn the fan off when it is + already off. +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: | + # Humidity Fan + + ## Problem statement + + Create a blueprint automation to turn on the fan based on the value of the humidity sensor. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the bathroom exhaust fan when the bathroom humidty is above 60% + - Make sure the garage does not get too humid + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. | + | `humidity_level` | A `number` selector to set the value used as the trigger. | + | `fan_entity` | A `fan` entity to turn on when the automation fires. | + + The automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When + triggered, the fan should be turned off. The fan should also be stopped when the + sensor goes back down below the desired level. The automation should not try to + turn the fan on if it is already on, and should not turn the fan off when it is + already off. + context: + id: 01KHJQVPD9KWWTP96NY7FGFN5S + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:52.489182+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bathroom Exhaust Fan + domain: fan + areas: Bathroom + - names: Bathroom Exhaust Fan Humidity + domain: sensor + areas: Bathroom + - names: Bathroom Light + domain: light + areas: Bathroom + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Smart Plug + domain: switch + areas: Kitchen + - names: Smart Plug Energy + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Storage Room Light + domain: light + areas: Storage Room + created: 2026-02-16 08:07:52.491259+00:00 + - role: user + content: | + # Humidity Fan + + ## Problem statement + + Create a blueprint automation to turn on the fan based on the value of the humidity sensor. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the bathroom exhaust fan when the bathroom humidty is above 60% + - Make sure the garage does not get too humid + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. | + | `humidity_level` | A `number` selector to set the value used as the trigger. | + | `fan_entity` | A `fan` entity to turn on when the automation fires. | + + The automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When + triggered, the fan should be turned off. The fan should also be stopped when the + sensor goes back down below the desired level. The automation should not try to + turn the fan on if it is already on, and should not turn the fan off when it is + already off. + attachments: null + created: 2026-02-16 08:07:52.489261+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f94490292d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f9428bcbd70>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:52.491271+00:00 + duration_ms: 168.157 + tries: 1 diff --git a/reports/automations/2026.2.2/gemma-3-27b-it/light_on_door_light_on_door-light_on_door-0.yaml b/reports/automations/2026.2.2/gemma-3-27b-it/light_on_door_light_on_door-light_on_door-0.yaml new file mode 100644 index 000000000..a1ab61147 --- /dev/null +++ b/reports/automations/2026.2.2/gemma-3-27b-it/light_on_door_light_on_door-light_on_door-0.yaml @@ -0,0 +1,232 @@ +--- +uuid: cb1f507e-2940-4e87-a601-f4bfee3d00c6 +task_id: light_on_door_light_on_door-light_on_door-0 +model_id: gemma-3-27b-it +category: light +task: + input_text: | + # Light on door + + ## Problem statement + + Create an blueprint automation to turn on a light when the door opens. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the pantry light when the pantry door opens + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | -------------- | ------------------------------------------------------------------------------- | + | `door_sensor` | A `binary_sensor` that triggers the automation to start. | + | `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. | + + The automation should trigger when the door opens, and turn the light on. The + light should be shut off when the door closes or after a 2 minute timeout. +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: | + # Light on door + + ## Problem statement + + Create an blueprint automation to turn on a light when the door opens. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the pantry light when the pantry door opens + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | -------------- | ------------------------------------------------------------------------------- | + | `door_sensor` | A `binary_sensor` that triggers the automation to start. | + | `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. | + + The automation should trigger when the door opens, and turn the light on. The + light should be shut off when the door closes or after a 2 minute timeout. + context: + id: 01KHJQVPRNCD19279BYX4YW8VJ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:52.853498+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Lights + domain: light + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Pantry Door + domain: binary_sensor + areas: Kitchen + - names: Pantry Door Battery + domain: binary_sensor + areas: Kitchen + - names: Pantry Door Battery + domain: sensor + areas: Kitchen + - names: Pantry Light + domain: light + areas: Kitchen + - names: Smart Oven + domain: switch + areas: Kitchen + created: 2026-02-16 08:07:52.854496+00:00 + - role: user + content: | + # Light on door + + ## Problem statement + + Create an blueprint automation to turn on a light when the door opens. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the pantry light when the pantry door opens + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | -------------- | ------------------------------------------------------------------------------- | + | `door_sensor` | A `binary_sensor` that triggers the automation to start. | + | `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. | + + The automation should trigger when the door opens, and turn the light on. The + light should be shut off when the door closes or after a 2 minute timeout. + attachments: null + created: 2026-02-16 08:07:52.853567+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:52.854507+00:00 + duration_ms: 173.842 + tries: 1 diff --git a/reports/automations/2026.2.2/gemma-3-27b-it/light_on_door_light_on_door-light_on_door-1.yaml b/reports/automations/2026.2.2/gemma-3-27b-it/light_on_door_light_on_door-light_on_door-1.yaml new file mode 100644 index 000000000..adc3de9b4 --- /dev/null +++ b/reports/automations/2026.2.2/gemma-3-27b-it/light_on_door_light_on_door-light_on_door-1.yaml @@ -0,0 +1,232 @@ +--- +uuid: a612e233-46a3-49d0-b036-a7c40bbd09a3 +task_id: light_on_door_light_on_door-light_on_door-1 +model_id: gemma-3-27b-it +category: light +task: + input_text: | + # Light on door + + ## Problem statement + + Create an blueprint automation to turn on a light when the door opens. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the pantry light when the pantry door opens + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | -------------- | ------------------------------------------------------------------------------- | + | `door_sensor` | A `binary_sensor` that triggers the automation to start. | + | `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. | + + The automation should trigger when the door opens, and turn the light on. The + light should be shut off when the door closes or after a 2 minute timeout. +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: | + # Light on door + + ## Problem statement + + Create an blueprint automation to turn on a light when the door opens. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the pantry light when the pantry door opens + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | -------------- | ------------------------------------------------------------------------------- | + | `door_sensor` | A `binary_sensor` that triggers the automation to start. | + | `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. | + + The automation should trigger when the door opens, and turn the light on. The + light should be shut off when the door closes or after a 2 minute timeout. + context: + id: 01KHJQVQ42Q07264568YR3HDHH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:53.218997+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garden Lights + domain: light + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Pantry Door + domain: binary_sensor + areas: Kitchen + - names: Pantry Door Battery + domain: sensor + areas: Kitchen + - names: Pantry Door Battery + domain: binary_sensor + areas: Kitchen + - names: Pantry Light + domain: light + areas: Kitchen + - names: Smart Oven + domain: switch + areas: Kitchen + created: 2026-02-16 08:07:53.219962+00:00 + - role: user + content: | + # Light on door + + ## Problem statement + + Create an blueprint automation to turn on a light when the door opens. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the pantry light when the pantry door opens + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | -------------- | ------------------------------------------------------------------------------- | + | `door_sensor` | A `binary_sensor` that triggers the automation to start. | + | `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. | + + The automation should trigger when the door opens, and turn the light on. The + light should be shut off when the door closes or after a 2 minute timeout. + attachments: null + created: 2026-02-16 08:07:53.219066+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:53.219974+00:00 + duration_ms: 167.848 + tries: 1 diff --git a/reports/automations/2026.2.2/gemma-3-27b-it/light_on_door_light_on_door-light_on_door-2.yaml b/reports/automations/2026.2.2/gemma-3-27b-it/light_on_door_light_on_door-light_on_door-2.yaml new file mode 100644 index 000000000..df4661b86 --- /dev/null +++ b/reports/automations/2026.2.2/gemma-3-27b-it/light_on_door_light_on_door-light_on_door-2.yaml @@ -0,0 +1,232 @@ +--- +uuid: 9899336c-acdb-436b-92c9-7552b8fa63fa +task_id: light_on_door_light_on_door-light_on_door-2 +model_id: gemma-3-27b-it +category: light +task: + input_text: | + # Light on door + + ## Problem statement + + Create an blueprint automation to turn on a light when the door opens. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the pantry light when the pantry door opens + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | -------------- | ------------------------------------------------------------------------------- | + | `door_sensor` | A `binary_sensor` that triggers the automation to start. | + | `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. | + + The automation should trigger when the door opens, and turn the light on. The + light should be shut off when the door closes or after a 2 minute timeout. +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: | + # Light on door + + ## Problem statement + + Create an blueprint automation to turn on a light when the door opens. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the pantry light when the pantry door opens + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | -------------- | ------------------------------------------------------------------------------- | + | `door_sensor` | A `binary_sensor` that triggers the automation to start. | + | `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. | + + The automation should trigger when the door opens, and turn the light on. The + light should be shut off when the door closes or after a 2 minute timeout. + context: + id: 01KHJQVQEMFZPZGMSR6X1G5G61 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:53.556325+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Lights + domain: light + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Pantry Door + domain: binary_sensor + areas: Kitchen + - names: Pantry Door Battery + domain: binary_sensor + areas: Kitchen + - names: Pantry Door Battery + domain: sensor + areas: Kitchen + - names: Pantry Light + domain: light + areas: Kitchen + - names: Smart Oven + domain: switch + areas: Kitchen + created: 2026-02-16 08:07:53.557565+00:00 + - role: user + content: | + # Light on door + + ## Problem statement + + Create an blueprint automation to turn on a light when the door opens. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the pantry light when the pantry door opens + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | -------------- | ------------------------------------------------------------------------------- | + | `door_sensor` | A `binary_sensor` that triggers the automation to start. | + | `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. | + + The automation should trigger when the door opens, and turn the light on. The + light should be shut off when the door closes or after a 2 minute timeout. + attachments: null + created: 2026-02-16 08:07:53.556393+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:53.557576+00:00 + duration_ms: 169.622 + tries: 1 diff --git a/reports/automations/2026.2.2/gemma-3-27b-it/light_on_door_light_on_door-light_on_door-3.yaml b/reports/automations/2026.2.2/gemma-3-27b-it/light_on_door_light_on_door-light_on_door-3.yaml new file mode 100644 index 000000000..7d32dfb70 --- /dev/null +++ b/reports/automations/2026.2.2/gemma-3-27b-it/light_on_door_light_on_door-light_on_door-3.yaml @@ -0,0 +1,232 @@ +--- +uuid: e4df2d5a-8fdb-41b5-a1bb-7b4f5235d533 +task_id: light_on_door_light_on_door-light_on_door-3 +model_id: gemma-3-27b-it +category: light +task: + input_text: | + # Light on door + + ## Problem statement + + Create an blueprint automation to turn on a light when the door opens. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the pantry light when the pantry door opens + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | -------------- | ------------------------------------------------------------------------------- | + | `door_sensor` | A `binary_sensor` that triggers the automation to start. | + | `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. | + + The automation should trigger when the door opens, and turn the light on. The + light should be shut off when the door closes or after a 2 minute timeout. +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: | + # Light on door + + ## Problem statement + + Create an blueprint automation to turn on a light when the door opens. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the pantry light when the pantry door opens + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | -------------- | ------------------------------------------------------------------------------- | + | `door_sensor` | A `binary_sensor` that triggers the automation to start. | + | `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. | + + The automation should trigger when the door opens, and turn the light on. The + light should be shut off when the door closes or after a 2 minute timeout. + context: + id: 01KHJQVQT59JQAWH791G1995BB + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:53.925844+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garden Lights + domain: light + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Pantry Door + domain: binary_sensor + areas: Kitchen + - names: Pantry Door Battery + domain: binary_sensor + areas: Kitchen + - names: Pantry Door Battery + domain: sensor + areas: Kitchen + - names: Pantry Light + domain: light + areas: Kitchen + - names: Smart Oven + domain: switch + areas: Kitchen + created: 2026-02-16 08:07:53.926798+00:00 + - role: user + content: | + # Light on door + + ## Problem statement + + Create an blueprint automation to turn on a light when the door opens. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the pantry light when the pantry door opens + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | -------------- | ------------------------------------------------------------------------------- | + | `door_sensor` | A `binary_sensor` that triggers the automation to start. | + | `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. | + + The automation should trigger when the door opens, and turn the light on. The + light should be shut off when the door closes or after a 2 minute timeout. + attachments: null + created: 2026-02-16 08:07:53.925911+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:53.926808+00:00 + duration_ms: 310.185 + tries: 1 diff --git a/reports/automations/2026.2.2/gemma-3-27b-it/light_on_door_light_on_door-light_on_door-4.yaml b/reports/automations/2026.2.2/gemma-3-27b-it/light_on_door_light_on_door-light_on_door-4.yaml new file mode 100644 index 000000000..d3693c898 --- /dev/null +++ b/reports/automations/2026.2.2/gemma-3-27b-it/light_on_door_light_on_door-light_on_door-4.yaml @@ -0,0 +1,232 @@ +--- +uuid: b11f0ad3-3875-4f10-8598-2961ce9c9dfa +task_id: light_on_door_light_on_door-light_on_door-4 +model_id: gemma-3-27b-it +category: light +task: + input_text: | + # Light on door + + ## Problem statement + + Create an blueprint automation to turn on a light when the door opens. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the pantry light when the pantry door opens + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | -------------- | ------------------------------------------------------------------------------- | + | `door_sensor` | A `binary_sensor` that triggers the automation to start. | + | `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. | + + The automation should trigger when the door opens, and turn the light on. The + light should be shut off when the door closes or after a 2 minute timeout. +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: | + # Light on door + + ## Problem statement + + Create an blueprint automation to turn on a light when the door opens. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the pantry light when the pantry door opens + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | -------------- | ------------------------------------------------------------------------------- | + | `door_sensor` | A `binary_sensor` that triggers the automation to start. | + | `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. | + + The automation should trigger when the door opens, and turn the light on. The + light should be shut off when the door closes or after a 2 minute timeout. + context: + id: 01KHJQVRA1CNF3HKB2ZW8K1PPN + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:54.433582+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bedroom 1 Light + domain: light + areas: Bedroom 1 + - names: Bedroom 2 Light + domain: light + areas: Bedroom 2 + - names: Bedroom 3 Light + domain: light + areas: Bedroom 3 + - names: Bedroom 4 Light + domain: light + areas: Bedroom 4 + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Garage Door Opener + domain: light + areas: Garage + - names: Garage Door Opener + domain: cover + areas: Garage + - names: Garden Lights + domain: light + areas: Front yard + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Pantry Door + domain: binary_sensor + areas: Kitchen + - names: Pantry Door Battery + domain: binary_sensor + areas: Kitchen + - names: Pantry Door Battery + domain: sensor + areas: Kitchen + - names: Pantry Light + domain: light + areas: Kitchen + - names: Smart Oven + domain: switch + areas: Kitchen + created: 2026-02-16 08:07:54.435362+00:00 + - role: user + content: | + # Light on door + + ## Problem statement + + Create an blueprint automation to turn on a light when the door opens. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Turn on the pantry light when the pantry door opens + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | -------------- | ------------------------------------------------------------------------------- | + | `door_sensor` | A `binary_sensor` that triggers the automation to start. | + | `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. | + + The automation should trigger when the door opens, and turn the light on. The + light should be shut off when the door closes or after a 2 minute timeout. + attachments: null + created: 2026-02-16 08:07:54.433649+00:00 + tools: + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None), ''position'': All(Coerce(int, msg=None), + Range(min=0, max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''awning'', ''blind'', ''curtain'', + ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', ''window'', + ''water'', ''gas''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:54.435374+00:00 + duration_ms: 171.355 + tries: 1 diff --git a/reports/automations/2026.2.2/gemma-3-27b-it/vacuum_pause_vacuum_pause-vacuum_pause-0.yaml b/reports/automations/2026.2.2/gemma-3-27b-it/vacuum_pause_vacuum_pause-vacuum_pause-0.yaml new file mode 100644 index 000000000..719dabc5c --- /dev/null +++ b/reports/automations/2026.2.2/gemma-3-27b-it/vacuum_pause_vacuum_pause-vacuum_pause-0.yaml @@ -0,0 +1,310 @@ +--- +uuid: fed69f41-31a0-465f-9e41-7f9e21de95c8 +task_id: vacuum_pause_vacuum_pause-vacuum_pause-0 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: | + # Vacuum Pause + + ## Problem statement + + Create a blueprint automation to pause the vacuum when I receive a phone call. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Pause the vacuum when I get a phone call from my relatives + - Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. | + | `vacuum_entity` | A `vacuum` entity to pause when the automation fires. | + + The automation should trigger when the binary sensor fires and the vacuum is running. The vacuum + should be paused (not stopped) and can be resumed manually. +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: | + # Vacuum Pause + + ## Problem statement + + Create a blueprint automation to pause the vacuum when I receive a phone call. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Pause the vacuum when I get a phone call from my relatives + - Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. | + | `vacuum_entity` | A `vacuum` entity to pause when the automation fires. | + + The automation should trigger when the binary sensor fires and the vacuum is running. The vacuum + should be paused (not stopped) and can be resumed manually. + context: + id: 01KHJQVRNKV7M69DV0E3064572 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:54.803742+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bathroom Light + domain: light + areas: Bathroom + - names: Exhaust Fan + domain: fan + areas: Bathroom + - names: Exhaust Fan Humidity + domain: sensor + areas: Bathroom + - names: Lamp + domain: light + areas: Living Room/Kitchen/Bedroom + - names: Main Light + domain: light + areas: Living Room/Kitchen/Bedroom + - names: Motion Sensor + domain: binary_sensor + areas: Bathroom + - names: Motion Sensor Battery + domain: binary_sensor + areas: Bathroom + - names: Motion Sensor Battery + domain: sensor + areas: Bathroom + - names: Outdoor Light + domain: light + areas: Rooftop Terrace + - names: Rooftop Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Rooftop Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Rooftop Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Smart Speaker + domain: media_player + areas: Living Room/Kitchen/Bedroom + - names: Thermostat + domain: climate + areas: Living Room/Kitchen/Bedroom + - names: Thermostat Humidity + domain: sensor + areas: Living Room/Kitchen/Bedroom + - names: Thermostat Temperature + domain: sensor + areas: Living Room/Kitchen/Bedroom + - names: Tv + domain: media_player + areas: Living Room/Kitchen/Bedroom + - names: Vacuum Cleaner + domain: vacuum + areas: Living Room/Kitchen/Bedroom + created: 2026-02-16 08:07:54.816831+00:00 + - role: user + content: | + # Vacuum Pause + + ## Problem statement + + Create a blueprint automation to pause the vacuum when I receive a phone call. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Pause the vacuum when I get a phone call from my relatives + - Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. | + | `vacuum_entity` | A `vacuum` entity to pause when the automation fires. | + + The automation should trigger when the binary sensor fires and the vacuum is running. The vacuum + should be paused (not stopped) and can be resumed manually. + attachments: null + created: 2026-02-16 08:07:54.803813+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f9448b0afb0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f9428bcbd70>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:54.816850+00:00 + duration_ms: 182.183 + tries: 1 diff --git a/reports/automations/2026.2.2/gemma-3-27b-it/vacuum_pause_vacuum_pause-vacuum_pause-1.yaml b/reports/automations/2026.2.2/gemma-3-27b-it/vacuum_pause_vacuum_pause-vacuum_pause-1.yaml new file mode 100644 index 000000000..9cbb2262a --- /dev/null +++ b/reports/automations/2026.2.2/gemma-3-27b-it/vacuum_pause_vacuum_pause-vacuum_pause-1.yaml @@ -0,0 +1,310 @@ +--- +uuid: 6d24ec79-a431-48d8-b05b-04a2a9d73e25 +task_id: vacuum_pause_vacuum_pause-vacuum_pause-1 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: | + # Vacuum Pause + + ## Problem statement + + Create a blueprint automation to pause the vacuum when I receive a phone call. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Pause the vacuum when I get a phone call from my relatives + - Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. | + | `vacuum_entity` | A `vacuum` entity to pause when the automation fires. | + + The automation should trigger when the binary sensor fires and the vacuum is running. The vacuum + should be paused (not stopped) and can be resumed manually. +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: | + # Vacuum Pause + + ## Problem statement + + Create a blueprint automation to pause the vacuum when I receive a phone call. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Pause the vacuum when I get a phone call from my relatives + - Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. | + | `vacuum_entity` | A `vacuum` entity to pause when the automation fires. | + + The automation should trigger when the binary sensor fires and the vacuum is running. The vacuum + should be paused (not stopped) and can be resumed manually. + context: + id: 01KHJQVS0XAGNY4S3Z5F5VRMCV + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:55.165467+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bathroom Light + domain: light + areas: Bathroom + - names: Exhaust Fan + domain: fan + areas: Bathroom + - names: Exhaust Fan Humidity + domain: sensor + areas: Bathroom + - names: Lamp + domain: light + areas: Living Room/Kitchen/Bedroom + - names: Main Light + domain: light + areas: Living Room/Kitchen/Bedroom + - names: Motion Sensor + domain: binary_sensor + areas: Bathroom + - names: Motion Sensor Battery + domain: binary_sensor + areas: Bathroom + - names: Motion Sensor Battery + domain: sensor + areas: Bathroom + - names: Outdoor Light + domain: light + areas: Rooftop Terrace + - names: Rooftop Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Rooftop Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Rooftop Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Smart Speaker + domain: media_player + areas: Living Room/Kitchen/Bedroom + - names: Thermostat + domain: climate + areas: Living Room/Kitchen/Bedroom + - names: Thermostat Humidity + domain: sensor + areas: Living Room/Kitchen/Bedroom + - names: Thermostat Temperature + domain: sensor + areas: Living Room/Kitchen/Bedroom + - names: Tv + domain: media_player + areas: Living Room/Kitchen/Bedroom + - names: Vacuum Cleaner + domain: vacuum + areas: Living Room/Kitchen/Bedroom + created: 2026-02-16 08:07:55.167066+00:00 + - role: user + content: | + # Vacuum Pause + + ## Problem statement + + Create a blueprint automation to pause the vacuum when I receive a phone call. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Pause the vacuum when I get a phone call from my relatives + - Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. | + | `vacuum_entity` | A `vacuum` entity to pause when the automation fires. | + + The automation should trigger when the binary sensor fires and the vacuum is running. The vacuum + should be paused (not stopped) and can be resumed manually. + attachments: null + created: 2026-02-16 08:07:55.165551+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f9448fe7060>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f9428bcbd70>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:55.167077+00:00 + duration_ms: 28.683 + tries: 1 diff --git a/reports/automations/2026.2.2/gemma-3-27b-it/vacuum_pause_vacuum_pause-vacuum_pause-2.yaml b/reports/automations/2026.2.2/gemma-3-27b-it/vacuum_pause_vacuum_pause-vacuum_pause-2.yaml new file mode 100644 index 000000000..b814fe0a6 --- /dev/null +++ b/reports/automations/2026.2.2/gemma-3-27b-it/vacuum_pause_vacuum_pause-vacuum_pause-2.yaml @@ -0,0 +1,310 @@ +--- +uuid: a85369e6-1f4a-439f-9b01-827dc3507b34 +task_id: vacuum_pause_vacuum_pause-vacuum_pause-2 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: | + # Vacuum Pause + + ## Problem statement + + Create a blueprint automation to pause the vacuum when I receive a phone call. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Pause the vacuum when I get a phone call from my relatives + - Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. | + | `vacuum_entity` | A `vacuum` entity to pause when the automation fires. | + + The automation should trigger when the binary sensor fires and the vacuum is running. The vacuum + should be paused (not stopped) and can be resumed manually. +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: | + # Vacuum Pause + + ## Problem statement + + Create a blueprint automation to pause the vacuum when I receive a phone call. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Pause the vacuum when I get a phone call from my relatives + - Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. | + | `vacuum_entity` | A `vacuum` entity to pause when the automation fires. | + + The automation should trigger when the binary sensor fires and the vacuum is running. The vacuum + should be paused (not stopped) and can be resumed manually. + context: + id: 01KHJQVS7E9MSFR4XSGZPH4H35 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:55.374928+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bathroom Light + domain: light + areas: Bathroom + - names: Exhaust Fan + domain: fan + areas: Bathroom + - names: Exhaust Fan Humidity + domain: sensor + areas: Bathroom + - names: Lamp + domain: light + areas: Living Room/Kitchen/Bedroom + - names: Main Light + domain: light + areas: Living Room/Kitchen/Bedroom + - names: Motion Sensor + domain: binary_sensor + areas: Bathroom + - names: Motion Sensor Battery + domain: binary_sensor + areas: Bathroom + - names: Motion Sensor Battery + domain: sensor + areas: Bathroom + - names: Outdoor Light + domain: light + areas: Rooftop Terrace + - names: Rooftop Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Rooftop Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Rooftop Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Smart Speaker + domain: media_player + areas: Living Room/Kitchen/Bedroom + - names: Thermostat + domain: climate + areas: Living Room/Kitchen/Bedroom + - names: Thermostat Humidity + domain: sensor + areas: Living Room/Kitchen/Bedroom + - names: Thermostat Temperature + domain: sensor + areas: Living Room/Kitchen/Bedroom + - names: Tv + domain: media_player + areas: Living Room/Kitchen/Bedroom + - names: Vacuum Cleaner + domain: vacuum + areas: Living Room/Kitchen/Bedroom + created: 2026-02-16 08:07:55.376645+00:00 + - role: user + content: | + # Vacuum Pause + + ## Problem statement + + Create a blueprint automation to pause the vacuum when I receive a phone call. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Pause the vacuum when I get a phone call from my relatives + - Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. | + | `vacuum_entity` | A `vacuum` entity to pause when the automation fires. | + + The automation should trigger when the binary sensor fires and the vacuum is running. The vacuum + should be paused (not stopped) and can be resumed manually. + attachments: null + created: 2026-02-16 08:07:55.375002+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f942816df30>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f9428bcbd70>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:55.376656+00:00 + duration_ms: 174.337 + tries: 1 diff --git a/reports/automations/2026.2.2/gemma-3-27b-it/vacuum_pause_vacuum_pause-vacuum_pause-3.yaml b/reports/automations/2026.2.2/gemma-3-27b-it/vacuum_pause_vacuum_pause-vacuum_pause-3.yaml new file mode 100644 index 000000000..69615106e --- /dev/null +++ b/reports/automations/2026.2.2/gemma-3-27b-it/vacuum_pause_vacuum_pause-vacuum_pause-3.yaml @@ -0,0 +1,310 @@ +--- +uuid: 829165e6-7b29-479a-a137-68539a7670a0 +task_id: vacuum_pause_vacuum_pause-vacuum_pause-3 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: | + # Vacuum Pause + + ## Problem statement + + Create a blueprint automation to pause the vacuum when I receive a phone call. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Pause the vacuum when I get a phone call from my relatives + - Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. | + | `vacuum_entity` | A `vacuum` entity to pause when the automation fires. | + + The automation should trigger when the binary sensor fires and the vacuum is running. The vacuum + should be paused (not stopped) and can be resumed manually. +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: | + # Vacuum Pause + + ## Problem statement + + Create a blueprint automation to pause the vacuum when I receive a phone call. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Pause the vacuum when I get a phone call from my relatives + - Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. | + | `vacuum_entity` | A `vacuum` entity to pause when the automation fires. | + + The automation should trigger when the binary sensor fires and the vacuum is running. The vacuum + should be paused (not stopped) and can be resumed manually. + context: + id: 01KHJQVSJGSQW1NQ44M89YX4ZZ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:55.728603+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bathroom Light + domain: light + areas: Bathroom + - names: Exhaust Fan + domain: fan + areas: Bathroom + - names: Exhaust Fan Humidity + domain: sensor + areas: Bathroom + - names: Lamp + domain: light + areas: Living Room/Kitchen/Bedroom + - names: Main Light + domain: light + areas: Living Room/Kitchen/Bedroom + - names: Motion Sensor + domain: binary_sensor + areas: Bathroom + - names: Motion Sensor Battery + domain: binary_sensor + areas: Bathroom + - names: Motion Sensor Battery + domain: sensor + areas: Bathroom + - names: Outdoor Light + domain: light + areas: Rooftop Terrace + - names: Rooftop Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Rooftop Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Rooftop Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Smart Speaker + domain: media_player + areas: Living Room/Kitchen/Bedroom + - names: Thermostat + domain: climate + areas: Living Room/Kitchen/Bedroom + - names: Thermostat Humidity + domain: sensor + areas: Living Room/Kitchen/Bedroom + - names: Thermostat Temperature + domain: sensor + areas: Living Room/Kitchen/Bedroom + - names: Tv + domain: media_player + areas: Living Room/Kitchen/Bedroom + - names: Vacuum Cleaner + domain: vacuum + areas: Living Room/Kitchen/Bedroom + created: 2026-02-16 08:07:55.730418+00:00 + - role: user + content: | + # Vacuum Pause + + ## Problem statement + + Create a blueprint automation to pause the vacuum when I receive a phone call. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Pause the vacuum when I get a phone call from my relatives + - Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. | + | `vacuum_entity` | A `vacuum` entity to pause when the automation fires. | + + The automation should trigger when the binary sensor fires and the vacuum is running. The vacuum + should be paused (not stopped) and can be resumed manually. + attachments: null + created: 2026-02-16 08:07:55.728670+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f9418e1f110>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f9428bcbd70>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:55.730430+00:00 + duration_ms: 172.408 + tries: 1 diff --git a/reports/automations/2026.2.2/gemma-3-27b-it/vacuum_pause_vacuum_pause-vacuum_pause-4.yaml b/reports/automations/2026.2.2/gemma-3-27b-it/vacuum_pause_vacuum_pause-vacuum_pause-4.yaml new file mode 100644 index 000000000..9c177a616 --- /dev/null +++ b/reports/automations/2026.2.2/gemma-3-27b-it/vacuum_pause_vacuum_pause-vacuum_pause-4.yaml @@ -0,0 +1,310 @@ +--- +uuid: 9f09061f-86d5-4a65-9245-783052e51d6f +task_id: vacuum_pause_vacuum_pause-vacuum_pause-4 +model_id: gemma-3-27b-it +category: vacuum +task: + input_text: | + # Vacuum Pause + + ## Problem statement + + Create a blueprint automation to pause the vacuum when I receive a phone call. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Pause the vacuum when I get a phone call from my relatives + - Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. | + | `vacuum_entity` | A `vacuum` entity to pause when the automation fires. | + + The automation should trigger when the binary sensor fires and the vacuum is running. The vacuum + should be paused (not stopped) and can be resumed manually. +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: | + # Vacuum Pause + + ## Problem statement + + Create a blueprint automation to pause the vacuum when I receive a phone call. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Pause the vacuum when I get a phone call from my relatives + - Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. | + | `vacuum_entity` | A `vacuum` entity to pause when the automation fires. | + + The automation should trigger when the binary sensor fires and the vacuum is running. The vacuum + should be paused (not stopped) and can be resumed manually. + context: + id: 01KHJQVSYTE02SXTS3MKBMR1QW + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:56.122317+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Bathroom Light + domain: light + areas: Bathroom + - names: Exhaust Fan + domain: fan + areas: Bathroom + - names: Exhaust Fan Humidity + domain: sensor + areas: Bathroom + - names: Lamp + domain: light + areas: Living Room/Kitchen/Bedroom + - names: Main Light + domain: light + areas: Living Room/Kitchen/Bedroom + - names: Motion Sensor + domain: binary_sensor + areas: Bathroom + - names: Motion Sensor Battery + domain: binary_sensor + areas: Bathroom + - names: Motion Sensor Battery + domain: sensor + areas: Bathroom + - names: Outdoor Light + domain: light + areas: Rooftop Terrace + - names: Rooftop Terrace Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Rooftop Terrace Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Rooftop Terrace Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Smart Speaker + domain: media_player + areas: Living Room/Kitchen/Bedroom + - names: Thermostat + domain: climate + areas: Living Room/Kitchen/Bedroom + - names: Thermostat Humidity + domain: sensor + areas: Living Room/Kitchen/Bedroom + - names: Thermostat Temperature + domain: sensor + areas: Living Room/Kitchen/Bedroom + - names: Tv + domain: media_player + areas: Living Room/Kitchen/Bedroom + - names: Vacuum Cleaner + domain: vacuum + areas: Living Room/Kitchen/Bedroom + created: 2026-02-16 08:07:56.130169+00:00 + - role: user + content: | + # Vacuum Pause + + ## Problem statement + + Create a blueprint automation to pause the vacuum when I receive a phone call. + + ## Example use cases + + These are example use cases that could be used with the blueprint: + + - Pause the vacuum when I get a phone call from my relatives + - Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts + + ## Detailed Description + + The blueprint should accept two inputs: + + | Input | Description | + | ------------------- | --------------------------------------------------------------------- | + | `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. | + | `vacuum_entity` | A `vacuum` entity to pause when the automation fires. | + + The automation should trigger when the binary sensor fires and the vacuum is running. The vacuum + should be paused (not stopped) and can be resumed manually. + attachments: null + created: 2026-02-16 08:07:56.122410+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f9448f68300>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f9428bcbd70>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassFanSetSpeed + description: Sets a fan's speed by percentage + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''fan''])], msg=None), ''percentage'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassVacuumStart + description: Starts a vacuum + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassVacuumReturnToBase + description: Returns a vacuum to base + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''vacuum''])], msg=None)}' + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''outlet'', ''switch'', ''awning'', + ''blind'', ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', + ''shutter'', ''window'', ''water'', ''gas'', ''identify'', ''restart'', + ''update'', ''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:56.130185+00:00 + duration_ms: 35.072 + tries: 1 diff --git a/reports/automations/2026.2.2/report.csv b/reports/automations/2026.2.2/report.csv new file mode 100644 index 000000000..00b1cd5ad --- /dev/null +++ b/reports/automations/2026.2.2/report.csv @@ -0,0 +1,61 @@ +task_id,model_id,category,text,tool_call,response,task_name,label,details +"door_left_open_door_left_open-door_left_open-0","gemma-3-27b-it","media_player","# Door left open\n\n## Problem statement\n\nCreate an blueprint automation to play an alert on the speaker when the door is\nleft open.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Play an message on the living room speaker if the garage door has been left open for more than 30 minutes\n- When the front door is ajar for 5 minutes, play a sound on the kitchen bluetooth speaker.\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Selector Type | Description |\n| --------------- | ------------- | ------------------------------------------------------------------------------------- |\n| `door_sensor` | `entity` | A `binary_sensor` entity selector door sensor that is the target of the automation. |\n| `alert_media` | `media` | The target for the `media` selector for the media player and media id with the alter. |\n| `open_duration` | `duration` | A `duration` selector, the amount of time before playing the alert. |\n\nThe automation should play the selected media when the door has been open for the specified duration.\n","","Error talking to API","door_left_open-test_blueprint_inputs","Bad","AssertionError: Blueprint content is not available" +"door_left_open_door_left_open-door_left_open-0","gemma-3-27b-it","media_player","# Door left open\n\n## Problem statement\n\nCreate an blueprint automation to play an alert on the speaker when the door is\nleft open.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Play an message on the living room speaker if the garage door has been left open for more than 30 minutes\n- When the front door is ajar for 5 minutes, play a sound on the kitchen bluetooth speaker.\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Selector Type | Description |\n| --------------- | ------------- | ------------------------------------------------------------------------------------- |\n| `door_sensor` | `entity` | A `binary_sensor` entity selector door sensor that is the target of the automation. |\n| `alert_media` | `media` | The target for the `media` selector for the media player and media id with the alter. |\n| `open_duration` | `duration` | A `duration` selector, the amount of time before playing the alert. |\n\nThe automation should play the selected media when the door has been open for the specified duration.\n","","Error talking to API","door_left_open-test_door_open_plays_media","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"door_left_open_door_left_open-door_left_open-1","gemma-3-27b-it","media_player","# Door left open\n\n## Problem statement\n\nCreate an blueprint automation to play an alert on the speaker when the door is\nleft open.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Play an message on the living room speaker if the garage door has been left open for more than 30 minutes\n- When the front door is ajar for 5 minutes, play a sound on the kitchen bluetooth speaker.\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Selector Type | Description |\n| --------------- | ------------- | ------------------------------------------------------------------------------------- |\n| `door_sensor` | `entity` | A `binary_sensor` entity selector door sensor that is the target of the automation. |\n| `alert_media` | `media` | The target for the `media` selector for the media player and media id with the alter. |\n| `open_duration` | `duration` | A `duration` selector, the amount of time before playing the alert. |\n\nThe automation should play the selected media when the door has been open for the specified duration.\n","","Error talking to API","door_left_open-test_blueprint_inputs","Bad","AssertionError: Blueprint content is not available" +"door_left_open_door_left_open-door_left_open-1","gemma-3-27b-it","media_player","# Door left open\n\n## Problem statement\n\nCreate an blueprint automation to play an alert on the speaker when the door is\nleft open.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Play an message on the living room speaker if the garage door has been left open for more than 30 minutes\n- When the front door is ajar for 5 minutes, play a sound on the kitchen bluetooth speaker.\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Selector Type | Description |\n| --------------- | ------------- | ------------------------------------------------------------------------------------- |\n| `door_sensor` | `entity` | A `binary_sensor` entity selector door sensor that is the target of the automation. |\n| `alert_media` | `media` | The target for the `media` selector for the media player and media id with the alter. |\n| `open_duration` | `duration` | A `duration` selector, the amount of time before playing the alert. |\n\nThe automation should play the selected media when the door has been open for the specified duration.\n","","Error talking to API","door_left_open-test_door_open_plays_media","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"door_left_open_door_left_open-door_left_open-2","gemma-3-27b-it","media_player","# Door left open\n\n## Problem statement\n\nCreate an blueprint automation to play an alert on the speaker when the door is\nleft open.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Play an message on the living room speaker if the garage door has been left open for more than 30 minutes\n- When the front door is ajar for 5 minutes, play a sound on the kitchen bluetooth speaker.\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Selector Type | Description |\n| --------------- | ------------- | ------------------------------------------------------------------------------------- |\n| `door_sensor` | `entity` | A `binary_sensor` entity selector door sensor that is the target of the automation. |\n| `alert_media` | `media` | The target for the `media` selector for the media player and media id with the alter. |\n| `open_duration` | `duration` | A `duration` selector, the amount of time before playing the alert. |\n\nThe automation should play the selected media when the door has been open for the specified duration.\n","","Error talking to API","door_left_open-test_blueprint_inputs","Bad","AssertionError: Blueprint content is not available" +"door_left_open_door_left_open-door_left_open-2","gemma-3-27b-it","media_player","# Door left open\n\n## Problem statement\n\nCreate an blueprint automation to play an alert on the speaker when the door is\nleft open.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Play an message on the living room speaker if the garage door has been left open for more than 30 minutes\n- When the front door is ajar for 5 minutes, play a sound on the kitchen bluetooth speaker.\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Selector Type | Description |\n| --------------- | ------------- | ------------------------------------------------------------------------------------- |\n| `door_sensor` | `entity` | A `binary_sensor` entity selector door sensor that is the target of the automation. |\n| `alert_media` | `media` | The target for the `media` selector for the media player and media id with the alter. |\n| `open_duration` | `duration` | A `duration` selector, the amount of time before playing the alert. |\n\nThe automation should play the selected media when the door has been open for the specified duration.\n","","Error talking to API","door_left_open-test_door_open_plays_media","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"door_left_open_door_left_open-door_left_open-3","gemma-3-27b-it","media_player","# Door left open\n\n## Problem statement\n\nCreate an blueprint automation to play an alert on the speaker when the door is\nleft open.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Play an message on the living room speaker if the garage door has been left open for more than 30 minutes\n- When the front door is ajar for 5 minutes, play a sound on the kitchen bluetooth speaker.\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Selector Type | Description |\n| --------------- | ------------- | ------------------------------------------------------------------------------------- |\n| `door_sensor` | `entity` | A `binary_sensor` entity selector door sensor that is the target of the automation. |\n| `alert_media` | `media` | The target for the `media` selector for the media player and media id with the alter. |\n| `open_duration` | `duration` | A `duration` selector, the amount of time before playing the alert. |\n\nThe automation should play the selected media when the door has been open for the specified duration.\n","","Error talking to API","door_left_open-test_blueprint_inputs","Bad","AssertionError: Blueprint content is not available" +"door_left_open_door_left_open-door_left_open-3","gemma-3-27b-it","media_player","# Door left open\n\n## Problem statement\n\nCreate an blueprint automation to play an alert on the speaker when the door is\nleft open.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Play an message on the living room speaker if the garage door has been left open for more than 30 minutes\n- When the front door is ajar for 5 minutes, play a sound on the kitchen bluetooth speaker.\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Selector Type | Description |\n| --------------- | ------------- | ------------------------------------------------------------------------------------- |\n| `door_sensor` | `entity` | A `binary_sensor` entity selector door sensor that is the target of the automation. |\n| `alert_media` | `media` | The target for the `media` selector for the media player and media id with the alter. |\n| `open_duration` | `duration` | A `duration` selector, the amount of time before playing the alert. |\n\nThe automation should play the selected media when the door has been open for the specified duration.\n","","Error talking to API","door_left_open-test_door_open_plays_media","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"door_left_open_door_left_open-door_left_open-4","gemma-3-27b-it","media_player","# Door left open\n\n## Problem statement\n\nCreate an blueprint automation to play an alert on the speaker when the door is\nleft open.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Play an message on the living room speaker if the garage door has been left open for more than 30 minutes\n- When the front door is ajar for 5 minutes, play a sound on the kitchen bluetooth speaker.\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Selector Type | Description |\n| --------------- | ------------- | ------------------------------------------------------------------------------------- |\n| `door_sensor` | `entity` | A `binary_sensor` entity selector door sensor that is the target of the automation. |\n| `alert_media` | `media` | The target for the `media` selector for the media player and media id with the alter. |\n| `open_duration` | `duration` | A `duration` selector, the amount of time before playing the alert. |\n\nThe automation should play the selected media when the door has been open for the specified duration.\n","","Error talking to API","door_left_open-test_blueprint_inputs","Bad","AssertionError: Blueprint content is not available" +"door_left_open_door_left_open-door_left_open-4","gemma-3-27b-it","media_player","# Door left open\n\n## Problem statement\n\nCreate an blueprint automation to play an alert on the speaker when the door is\nleft open.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Play an message on the living room speaker if the garage door has been left open for more than 30 minutes\n- When the front door is ajar for 5 minutes, play a sound on the kitchen bluetooth speaker.\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Selector Type | Description |\n| --------------- | ------------- | ------------------------------------------------------------------------------------- |\n| `door_sensor` | `entity` | A `binary_sensor` entity selector door sensor that is the target of the automation. |\n| `alert_media` | `media` | The target for the `media` selector for the media player and media id with the alter. |\n| `open_duration` | `duration` | A `duration` selector, the amount of time before playing the alert. |\n\nThe automation should play the selected media when the door has been open for the specified duration.\n","","Error talking to API","door_left_open-test_door_open_plays_media","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"humidity_fan_humidity_fan-humidity_fan-0","gemma-3-27b-it","fan","# Humidity Fan\n\n## Problem statement\n\nCreate a blueprint automation to turn on the fan based on the value of the humidity sensor.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the bathroom exhaust fan when the bathroom humidty is above 60%\n- Make sure the garage does not get too humid\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. |\n| `humidity_level` | A `number` selector to set the value used as the trigger. |\n| `fan_entity` | A `fan` entity to turn on when the automation fires. |\n\nThe automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When\ntriggered, the fan should be turned off. The fan should also be stopped when the\nsensor goes back down below the desired level. The automation should not try to\nturn the fan on if it is already on, and should not turn the fan off when it is\nalready off.\n","","Error talking to API","humidity_fan-test_blueprint_inputs","Bad","AssertionError: Blueprint content is not available" +"humidity_fan_humidity_fan-humidity_fan-0","gemma-3-27b-it","fan","# Humidity Fan\n\n## Problem statement\n\nCreate a blueprint automation to turn on the fan based on the value of the humidity sensor.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the bathroom exhaust fan when the bathroom humidty is above 60%\n- Make sure the garage does not get too humid\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. |\n| `humidity_level` | A `number` selector to set the value used as the trigger. |\n| `fan_entity` | A `fan` entity to turn on when the automation fires. |\n\nThe automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When\ntriggered, the fan should be turned off. The fan should also be stopped when the\nsensor goes back down below the desired level. The automation should not try to\nturn the fan on if it is already on, and should not turn the fan off when it is\nalready off.\n","","Error talking to API","humidity_fan-test_fan_triggered_on","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"humidity_fan_humidity_fan-humidity_fan-0","gemma-3-27b-it","fan","# Humidity Fan\n\n## Problem statement\n\nCreate a blueprint automation to turn on the fan based on the value of the humidity sensor.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the bathroom exhaust fan when the bathroom humidty is above 60%\n- Make sure the garage does not get too humid\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. |\n| `humidity_level` | A `number` selector to set the value used as the trigger. |\n| `fan_entity` | A `fan` entity to turn on when the automation fires. |\n\nThe automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When\ntriggered, the fan should be turned off. The fan should also be stopped when the\nsensor goes back down below the desired level. The automation should not try to\nturn the fan on if it is already on, and should not turn the fan off when it is\nalready off.\n","","Error talking to API","humidity_fan-test_fan_triggered_off","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"humidity_fan_humidity_fan-humidity_fan-1","gemma-3-27b-it","fan","# Humidity Fan\n\n## Problem statement\n\nCreate a blueprint automation to turn on the fan based on the value of the humidity sensor.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the bathroom exhaust fan when the bathroom humidty is above 60%\n- Make sure the garage does not get too humid\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. |\n| `humidity_level` | A `number` selector to set the value used as the trigger. |\n| `fan_entity` | A `fan` entity to turn on when the automation fires. |\n\nThe automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When\ntriggered, the fan should be turned off. The fan should also be stopped when the\nsensor goes back down below the desired level. The automation should not try to\nturn the fan on if it is already on, and should not turn the fan off when it is\nalready off.\n","","Error talking to API","humidity_fan-test_blueprint_inputs","Bad","AssertionError: Blueprint content is not available" +"humidity_fan_humidity_fan-humidity_fan-1","gemma-3-27b-it","fan","# Humidity Fan\n\n## Problem statement\n\nCreate a blueprint automation to turn on the fan based on the value of the humidity sensor.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the bathroom exhaust fan when the bathroom humidty is above 60%\n- Make sure the garage does not get too humid\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. |\n| `humidity_level` | A `number` selector to set the value used as the trigger. |\n| `fan_entity` | A `fan` entity to turn on when the automation fires. |\n\nThe automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When\ntriggered, the fan should be turned off. The fan should also be stopped when the\nsensor goes back down below the desired level. The automation should not try to\nturn the fan on if it is already on, and should not turn the fan off when it is\nalready off.\n","","Error talking to API","humidity_fan-test_fan_triggered_on","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"humidity_fan_humidity_fan-humidity_fan-1","gemma-3-27b-it","fan","# Humidity Fan\n\n## Problem statement\n\nCreate a blueprint automation to turn on the fan based on the value of the humidity sensor.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the bathroom exhaust fan when the bathroom humidty is above 60%\n- Make sure the garage does not get too humid\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. |\n| `humidity_level` | A `number` selector to set the value used as the trigger. |\n| `fan_entity` | A `fan` entity to turn on when the automation fires. |\n\nThe automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When\ntriggered, the fan should be turned off. The fan should also be stopped when the\nsensor goes back down below the desired level. The automation should not try to\nturn the fan on if it is already on, and should not turn the fan off when it is\nalready off.\n","","Error talking to API","humidity_fan-test_fan_triggered_off","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"humidity_fan_humidity_fan-humidity_fan-2","gemma-3-27b-it","fan","# Humidity Fan\n\n## Problem statement\n\nCreate a blueprint automation to turn on the fan based on the value of the humidity sensor.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the bathroom exhaust fan when the bathroom humidty is above 60%\n- Make sure the garage does not get too humid\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. |\n| `humidity_level` | A `number` selector to set the value used as the trigger. |\n| `fan_entity` | A `fan` entity to turn on when the automation fires. |\n\nThe automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When\ntriggered, the fan should be turned off. The fan should also be stopped when the\nsensor goes back down below the desired level. The automation should not try to\nturn the fan on if it is already on, and should not turn the fan off when it is\nalready off.\n","","Error talking to API","humidity_fan-test_blueprint_inputs","Bad","AssertionError: Blueprint content is not available" +"humidity_fan_humidity_fan-humidity_fan-2","gemma-3-27b-it","fan","# Humidity Fan\n\n## Problem statement\n\nCreate a blueprint automation to turn on the fan based on the value of the humidity sensor.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the bathroom exhaust fan when the bathroom humidty is above 60%\n- Make sure the garage does not get too humid\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. |\n| `humidity_level` | A `number` selector to set the value used as the trigger. |\n| `fan_entity` | A `fan` entity to turn on when the automation fires. |\n\nThe automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When\ntriggered, the fan should be turned off. The fan should also be stopped when the\nsensor goes back down below the desired level. The automation should not try to\nturn the fan on if it is already on, and should not turn the fan off when it is\nalready off.\n","","Error talking to API","humidity_fan-test_fan_triggered_on","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"humidity_fan_humidity_fan-humidity_fan-2","gemma-3-27b-it","fan","# Humidity Fan\n\n## Problem statement\n\nCreate a blueprint automation to turn on the fan based on the value of the humidity sensor.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the bathroom exhaust fan when the bathroom humidty is above 60%\n- Make sure the garage does not get too humid\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. |\n| `humidity_level` | A `number` selector to set the value used as the trigger. |\n| `fan_entity` | A `fan` entity to turn on when the automation fires. |\n\nThe automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When\ntriggered, the fan should be turned off. The fan should also be stopped when the\nsensor goes back down below the desired level. The automation should not try to\nturn the fan on if it is already on, and should not turn the fan off when it is\nalready off.\n","","Error talking to API","humidity_fan-test_fan_triggered_off","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"humidity_fan_humidity_fan-humidity_fan-3","gemma-3-27b-it","fan","# Humidity Fan\n\n## Problem statement\n\nCreate a blueprint automation to turn on the fan based on the value of the humidity sensor.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the bathroom exhaust fan when the bathroom humidty is above 60%\n- Make sure the garage does not get too humid\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. |\n| `humidity_level` | A `number` selector to set the value used as the trigger. |\n| `fan_entity` | A `fan` entity to turn on when the automation fires. |\n\nThe automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When\ntriggered, the fan should be turned off. The fan should also be stopped when the\nsensor goes back down below the desired level. The automation should not try to\nturn the fan on if it is already on, and should not turn the fan off when it is\nalready off.\n","","Error talking to API","humidity_fan-test_blueprint_inputs","Bad","AssertionError: Blueprint content is not available" +"humidity_fan_humidity_fan-humidity_fan-3","gemma-3-27b-it","fan","# Humidity Fan\n\n## Problem statement\n\nCreate a blueprint automation to turn on the fan based on the value of the humidity sensor.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the bathroom exhaust fan when the bathroom humidty is above 60%\n- Make sure the garage does not get too humid\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. |\n| `humidity_level` | A `number` selector to set the value used as the trigger. |\n| `fan_entity` | A `fan` entity to turn on when the automation fires. |\n\nThe automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When\ntriggered, the fan should be turned off. The fan should also be stopped when the\nsensor goes back down below the desired level. The automation should not try to\nturn the fan on if it is already on, and should not turn the fan off when it is\nalready off.\n","","Error talking to API","humidity_fan-test_fan_triggered_on","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"humidity_fan_humidity_fan-humidity_fan-3","gemma-3-27b-it","fan","# Humidity Fan\n\n## Problem statement\n\nCreate a blueprint automation to turn on the fan based on the value of the humidity sensor.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the bathroom exhaust fan when the bathroom humidty is above 60%\n- Make sure the garage does not get too humid\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. |\n| `humidity_level` | A `number` selector to set the value used as the trigger. |\n| `fan_entity` | A `fan` entity to turn on when the automation fires. |\n\nThe automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When\ntriggered, the fan should be turned off. The fan should also be stopped when the\nsensor goes back down below the desired level. The automation should not try to\nturn the fan on if it is already on, and should not turn the fan off when it is\nalready off.\n","","Error talking to API","humidity_fan-test_fan_triggered_off","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"humidity_fan_humidity_fan-humidity_fan-4","gemma-3-27b-it","fan","# Humidity Fan\n\n## Problem statement\n\nCreate a blueprint automation to turn on the fan based on the value of the humidity sensor.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the bathroom exhaust fan when the bathroom humidty is above 60%\n- Make sure the garage does not get too humid\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. |\n| `humidity_level` | A `number` selector to set the value used as the trigger. |\n| `fan_entity` | A `fan` entity to turn on when the automation fires. |\n\nThe automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When\ntriggered, the fan should be turned off. The fan should also be stopped when the\nsensor goes back down below the desired level. The automation should not try to\nturn the fan on if it is already on, and should not turn the fan off when it is\nalready off.\n","","Error talking to API","humidity_fan-test_blueprint_inputs","Bad","AssertionError: Blueprint content is not available" +"humidity_fan_humidity_fan-humidity_fan-4","gemma-3-27b-it","fan","# Humidity Fan\n\n## Problem statement\n\nCreate a blueprint automation to turn on the fan based on the value of the humidity sensor.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the bathroom exhaust fan when the bathroom humidty is above 60%\n- Make sure the garage does not get too humid\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. |\n| `humidity_level` | A `number` selector to set the value used as the trigger. |\n| `fan_entity` | A `fan` entity to turn on when the automation fires. |\n\nThe automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When\ntriggered, the fan should be turned off. The fan should also be stopped when the\nsensor goes back down below the desired level. The automation should not try to\nturn the fan on if it is already on, and should not turn the fan off when it is\nalready off.\n","","Error talking to API","humidity_fan-test_fan_triggered_on","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"humidity_fan_humidity_fan-humidity_fan-4","gemma-3-27b-it","fan","# Humidity Fan\n\n## Problem statement\n\nCreate a blueprint automation to turn on the fan based on the value of the humidity sensor.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the bathroom exhaust fan when the bathroom humidty is above 60%\n- Make sure the garage does not get too humid\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `humidity_sensor` | A `sensor` entity that is a humidty sensor that triggers the automation. |\n| `humidity_level` | A `number` selector to set the value used as the trigger. |\n| `fan_entity` | A `fan` entity to turn on when the automation fires. |\n\nThe automation should trigger when the `humidity_sensor` level goes above `humidity_level`. When\ntriggered, the fan should be turned off. The fan should also be stopped when the\nsensor goes back down below the desired level. The automation should not try to\nturn the fan on if it is already on, and should not turn the fan off when it is\nalready off.\n","","Error talking to API","humidity_fan-test_fan_triggered_off","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"light_on_door_light_on_door-light_on_door-0","gemma-3-27b-it","light","# Light on door\n\n## Problem statement\n\nCreate an blueprint automation to turn on a light when the door opens.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the pantry light when the pantry door opens\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| -------------- | ------------------------------------------------------------------------------- |\n| `door_sensor` | A `binary_sensor` that triggers the automation to start. |\n| `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. |\n\nThe automation should trigger when the door opens, and turn the light on. The\nlight should be shut off when the door closes or after a 2 minute timeout.\n","","Error talking to API","light_on_door-test_blueprint_inputs","Bad","AssertionError: Blueprint content is not available" +"light_on_door_light_on_door-light_on_door-0","gemma-3-27b-it","light","# Light on door\n\n## Problem statement\n\nCreate an blueprint automation to turn on a light when the door opens.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the pantry light when the pantry door opens\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| -------------- | ------------------------------------------------------------------------------- |\n| `door_sensor` | A `binary_sensor` that triggers the automation to start. |\n| `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. |\n\nThe automation should trigger when the door opens, and turn the light on. The\nlight should be shut off when the door closes or after a 2 minute timeout.\n","","Error talking to API","light_on_door-test_door_open","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"light_on_door_light_on_door-light_on_door-0","gemma-3-27b-it","light","# Light on door\n\n## Problem statement\n\nCreate an blueprint automation to turn on a light when the door opens.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the pantry light when the pantry door opens\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| -------------- | ------------------------------------------------------------------------------- |\n| `door_sensor` | A `binary_sensor` that triggers the automation to start. |\n| `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. |\n\nThe automation should trigger when the door opens, and turn the light on. The\nlight should be shut off when the door closes or after a 2 minute timeout.\n","","Error talking to API","light_on_door-test_door_open_close","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"light_on_door_light_on_door-light_on_door-0","gemma-3-27b-it","light","# Light on door\n\n## Problem statement\n\nCreate an blueprint automation to turn on a light when the door opens.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the pantry light when the pantry door opens\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| -------------- | ------------------------------------------------------------------------------- |\n| `door_sensor` | A `binary_sensor` that triggers the automation to start. |\n| `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. |\n\nThe automation should trigger when the door opens, and turn the light on. The\nlight should be shut off when the door closes or after a 2 minute timeout.\n","","Error talking to API","light_on_door-test_light_timeout","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"light_on_door_light_on_door-light_on_door-1","gemma-3-27b-it","light","# Light on door\n\n## Problem statement\n\nCreate an blueprint automation to turn on a light when the door opens.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the pantry light when the pantry door opens\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| -------------- | ------------------------------------------------------------------------------- |\n| `door_sensor` | A `binary_sensor` that triggers the automation to start. |\n| `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. |\n\nThe automation should trigger when the door opens, and turn the light on. The\nlight should be shut off when the door closes or after a 2 minute timeout.\n","","Error talking to API","light_on_door-test_blueprint_inputs","Bad","AssertionError: Blueprint content is not available" +"light_on_door_light_on_door-light_on_door-1","gemma-3-27b-it","light","# Light on door\n\n## Problem statement\n\nCreate an blueprint automation to turn on a light when the door opens.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the pantry light when the pantry door opens\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| -------------- | ------------------------------------------------------------------------------- |\n| `door_sensor` | A `binary_sensor` that triggers the automation to start. |\n| `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. |\n\nThe automation should trigger when the door opens, and turn the light on. The\nlight should be shut off when the door closes or after a 2 minute timeout.\n","","Error talking to API","light_on_door-test_door_open","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"light_on_door_light_on_door-light_on_door-1","gemma-3-27b-it","light","# Light on door\n\n## Problem statement\n\nCreate an blueprint automation to turn on a light when the door opens.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the pantry light when the pantry door opens\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| -------------- | ------------------------------------------------------------------------------- |\n| `door_sensor` | A `binary_sensor` that triggers the automation to start. |\n| `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. |\n\nThe automation should trigger when the door opens, and turn the light on. The\nlight should be shut off when the door closes or after a 2 minute timeout.\n","","Error talking to API","light_on_door-test_door_open_close","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"light_on_door_light_on_door-light_on_door-1","gemma-3-27b-it","light","# Light on door\n\n## Problem statement\n\nCreate an blueprint automation to turn on a light when the door opens.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the pantry light when the pantry door opens\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| -------------- | ------------------------------------------------------------------------------- |\n| `door_sensor` | A `binary_sensor` that triggers the automation to start. |\n| `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. |\n\nThe automation should trigger when the door opens, and turn the light on. The\nlight should be shut off when the door closes or after a 2 minute timeout.\n","","Error talking to API","light_on_door-test_light_timeout","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"light_on_door_light_on_door-light_on_door-2","gemma-3-27b-it","light","# Light on door\n\n## Problem statement\n\nCreate an blueprint automation to turn on a light when the door opens.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the pantry light when the pantry door opens\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| -------------- | ------------------------------------------------------------------------------- |\n| `door_sensor` | A `binary_sensor` that triggers the automation to start. |\n| `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. |\n\nThe automation should trigger when the door opens, and turn the light on. The\nlight should be shut off when the door closes or after a 2 minute timeout.\n","","Error talking to API","light_on_door-test_blueprint_inputs","Bad","AssertionError: Blueprint content is not available" +"light_on_door_light_on_door-light_on_door-2","gemma-3-27b-it","light","# Light on door\n\n## Problem statement\n\nCreate an blueprint automation to turn on a light when the door opens.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the pantry light when the pantry door opens\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| -------------- | ------------------------------------------------------------------------------- |\n| `door_sensor` | A `binary_sensor` that triggers the automation to start. |\n| `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. |\n\nThe automation should trigger when the door opens, and turn the light on. The\nlight should be shut off when the door closes or after a 2 minute timeout.\n","","Error talking to API","light_on_door-test_door_open","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"light_on_door_light_on_door-light_on_door-2","gemma-3-27b-it","light","# Light on door\n\n## Problem statement\n\nCreate an blueprint automation to turn on a light when the door opens.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the pantry light when the pantry door opens\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| -------------- | ------------------------------------------------------------------------------- |\n| `door_sensor` | A `binary_sensor` that triggers the automation to start. |\n| `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. |\n\nThe automation should trigger when the door opens, and turn the light on. The\nlight should be shut off when the door closes or after a 2 minute timeout.\n","","Error talking to API","light_on_door-test_door_open_close","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"light_on_door_light_on_door-light_on_door-2","gemma-3-27b-it","light","# Light on door\n\n## Problem statement\n\nCreate an blueprint automation to turn on a light when the door opens.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the pantry light when the pantry door opens\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| -------------- | ------------------------------------------------------------------------------- |\n| `door_sensor` | A `binary_sensor` that triggers the automation to start. |\n| `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. |\n\nThe automation should trigger when the door opens, and turn the light on. The\nlight should be shut off when the door closes or after a 2 minute timeout.\n","","Error talking to API","light_on_door-test_light_timeout","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"light_on_door_light_on_door-light_on_door-3","gemma-3-27b-it","light","# Light on door\n\n## Problem statement\n\nCreate an blueprint automation to turn on a light when the door opens.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the pantry light when the pantry door opens\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| -------------- | ------------------------------------------------------------------------------- |\n| `door_sensor` | A `binary_sensor` that triggers the automation to start. |\n| `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. |\n\nThe automation should trigger when the door opens, and turn the light on. The\nlight should be shut off when the door closes or after a 2 minute timeout.\n","","Error talking to API","light_on_door-test_blueprint_inputs","Bad","AssertionError: Blueprint content is not available" +"light_on_door_light_on_door-light_on_door-3","gemma-3-27b-it","light","# Light on door\n\n## Problem statement\n\nCreate an blueprint automation to turn on a light when the door opens.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the pantry light when the pantry door opens\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| -------------- | ------------------------------------------------------------------------------- |\n| `door_sensor` | A `binary_sensor` that triggers the automation to start. |\n| `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. |\n\nThe automation should trigger when the door opens, and turn the light on. The\nlight should be shut off when the door closes or after a 2 minute timeout.\n","","Error talking to API","light_on_door-test_door_open","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"light_on_door_light_on_door-light_on_door-3","gemma-3-27b-it","light","# Light on door\n\n## Problem statement\n\nCreate an blueprint automation to turn on a light when the door opens.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the pantry light when the pantry door opens\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| -------------- | ------------------------------------------------------------------------------- |\n| `door_sensor` | A `binary_sensor` that triggers the automation to start. |\n| `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. |\n\nThe automation should trigger when the door opens, and turn the light on. The\nlight should be shut off when the door closes or after a 2 minute timeout.\n","","Error talking to API","light_on_door-test_door_open_close","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"light_on_door_light_on_door-light_on_door-3","gemma-3-27b-it","light","# Light on door\n\n## Problem statement\n\nCreate an blueprint automation to turn on a light when the door opens.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the pantry light when the pantry door opens\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| -------------- | ------------------------------------------------------------------------------- |\n| `door_sensor` | A `binary_sensor` that triggers the automation to start. |\n| `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. |\n\nThe automation should trigger when the door opens, and turn the light on. The\nlight should be shut off when the door closes or after a 2 minute timeout.\n","","Error talking to API","light_on_door-test_light_timeout","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"light_on_door_light_on_door-light_on_door-4","gemma-3-27b-it","light","# Light on door\n\n## Problem statement\n\nCreate an blueprint automation to turn on a light when the door opens.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the pantry light when the pantry door opens\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| -------------- | ------------------------------------------------------------------------------- |\n| `door_sensor` | A `binary_sensor` that triggers the automation to start. |\n| `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. |\n\nThe automation should trigger when the door opens, and turn the light on. The\nlight should be shut off when the door closes or after a 2 minute timeout.\n","","Error talking to API","light_on_door-test_blueprint_inputs","Bad","AssertionError: Blueprint content is not available" +"light_on_door_light_on_door-light_on_door-4","gemma-3-27b-it","light","# Light on door\n\n## Problem statement\n\nCreate an blueprint automation to turn on a light when the door opens.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the pantry light when the pantry door opens\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| -------------- | ------------------------------------------------------------------------------- |\n| `door_sensor` | A `binary_sensor` that triggers the automation to start. |\n| `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. |\n\nThe automation should trigger when the door opens, and turn the light on. The\nlight should be shut off when the door closes or after a 2 minute timeout.\n","","Error talking to API","light_on_door-test_door_open","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"light_on_door_light_on_door-light_on_door-4","gemma-3-27b-it","light","# Light on door\n\n## Problem statement\n\nCreate an blueprint automation to turn on a light when the door opens.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the pantry light when the pantry door opens\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| -------------- | ------------------------------------------------------------------------------- |\n| `door_sensor` | A `binary_sensor` that triggers the automation to start. |\n| `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. |\n\nThe automation should trigger when the door opens, and turn the light on. The\nlight should be shut off when the door closes or after a 2 minute timeout.\n","","Error talking to API","light_on_door-test_door_open_close","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"light_on_door_light_on_door-light_on_door-4","gemma-3-27b-it","light","# Light on door\n\n## Problem statement\n\nCreate an blueprint automation to turn on a light when the door opens.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Turn on the pantry light when the pantry door opens\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| -------------- | ------------------------------------------------------------------------------- |\n| `door_sensor` | A `binary_sensor` that triggers the automation to start. |\n| `light_switch` | One or more `light` entity targets to turn on or off when the automation fires. |\n\nThe automation should trigger when the door opens, and turn the light on. The\nlight should be shut off when the door closes or after a 2 minute timeout.\n","","Error talking to API","light_on_door-test_light_timeout","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"vacuum_pause_vacuum_pause-vacuum_pause-0","gemma-3-27b-it","vacuum","# Vacuum Pause\n\n## Problem statement\n\nCreate a blueprint automation to pause the vacuum when I receive a phone call.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Pause the vacuum when I get a phone call from my relatives\n- Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. |\n| `vacuum_entity` | A `vacuum` entity to pause when the automation fires. |\n\nThe automation should trigger when the binary sensor fires and the vacuum is running. The vacuum\nshould be paused (not stopped) and can be resumed manually.\n","","Error talking to API","vacuum_pause-test_blueprint_inputs","Bad","AssertionError: Blueprint content is not available" +"vacuum_pause_vacuum_pause-vacuum_pause-0","gemma-3-27b-it","vacuum","# Vacuum Pause\n\n## Problem statement\n\nCreate a blueprint automation to pause the vacuum when I receive a phone call.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Pause the vacuum when I get a phone call from my relatives\n- Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. |\n| `vacuum_entity` | A `vacuum` entity to pause when the automation fires. |\n\nThe automation should trigger when the binary sensor fires and the vacuum is running. The vacuum\nshould be paused (not stopped) and can be resumed manually.\n","","Error talking to API","vacuum_pause-test_vacuum_running_and_paused","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"vacuum_pause_vacuum_pause-vacuum_pause-0","gemma-3-27b-it","vacuum","# Vacuum Pause\n\n## Problem statement\n\nCreate a blueprint automation to pause the vacuum when I receive a phone call.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Pause the vacuum when I get a phone call from my relatives\n- Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. |\n| `vacuum_entity` | A `vacuum` entity to pause when the automation fires. |\n\nThe automation should trigger when the binary sensor fires and the vacuum is running. The vacuum\nshould be paused (not stopped) and can be resumed manually.\n","","Error talking to API","vacuum_pause-test_vacuum_not_changed_when_idle","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"vacuum_pause_vacuum_pause-vacuum_pause-1","gemma-3-27b-it","vacuum","# Vacuum Pause\n\n## Problem statement\n\nCreate a blueprint automation to pause the vacuum when I receive a phone call.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Pause the vacuum when I get a phone call from my relatives\n- Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. |\n| `vacuum_entity` | A `vacuum` entity to pause when the automation fires. |\n\nThe automation should trigger when the binary sensor fires and the vacuum is running. The vacuum\nshould be paused (not stopped) and can be resumed manually.\n","","Error talking to API","vacuum_pause-test_blueprint_inputs","Bad","AssertionError: Blueprint content is not available" +"vacuum_pause_vacuum_pause-vacuum_pause-1","gemma-3-27b-it","vacuum","# Vacuum Pause\n\n## Problem statement\n\nCreate a blueprint automation to pause the vacuum when I receive a phone call.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Pause the vacuum when I get a phone call from my relatives\n- Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. |\n| `vacuum_entity` | A `vacuum` entity to pause when the automation fires. |\n\nThe automation should trigger when the binary sensor fires and the vacuum is running. The vacuum\nshould be paused (not stopped) and can be resumed manually.\n","","Error talking to API","vacuum_pause-test_vacuum_running_and_paused","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"vacuum_pause_vacuum_pause-vacuum_pause-1","gemma-3-27b-it","vacuum","# Vacuum Pause\n\n## Problem statement\n\nCreate a blueprint automation to pause the vacuum when I receive a phone call.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Pause the vacuum when I get a phone call from my relatives\n- Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. |\n| `vacuum_entity` | A `vacuum` entity to pause when the automation fires. |\n\nThe automation should trigger when the binary sensor fires and the vacuum is running. The vacuum\nshould be paused (not stopped) and can be resumed manually.\n","","Error talking to API","vacuum_pause-test_vacuum_not_changed_when_idle","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"vacuum_pause_vacuum_pause-vacuum_pause-2","gemma-3-27b-it","vacuum","# Vacuum Pause\n\n## Problem statement\n\nCreate a blueprint automation to pause the vacuum when I receive a phone call.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Pause the vacuum when I get a phone call from my relatives\n- Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. |\n| `vacuum_entity` | A `vacuum` entity to pause when the automation fires. |\n\nThe automation should trigger when the binary sensor fires and the vacuum is running. The vacuum\nshould be paused (not stopped) and can be resumed manually.\n","","Error talking to API","vacuum_pause-test_blueprint_inputs","Bad","AssertionError: Blueprint content is not available" +"vacuum_pause_vacuum_pause-vacuum_pause-2","gemma-3-27b-it","vacuum","# Vacuum Pause\n\n## Problem statement\n\nCreate a blueprint automation to pause the vacuum when I receive a phone call.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Pause the vacuum when I get a phone call from my relatives\n- Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. |\n| `vacuum_entity` | A `vacuum` entity to pause when the automation fires. |\n\nThe automation should trigger when the binary sensor fires and the vacuum is running. The vacuum\nshould be paused (not stopped) and can be resumed manually.\n","","Error talking to API","vacuum_pause-test_vacuum_running_and_paused","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"vacuum_pause_vacuum_pause-vacuum_pause-2","gemma-3-27b-it","vacuum","# Vacuum Pause\n\n## Problem statement\n\nCreate a blueprint automation to pause the vacuum when I receive a phone call.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Pause the vacuum when I get a phone call from my relatives\n- Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. |\n| `vacuum_entity` | A `vacuum` entity to pause when the automation fires. |\n\nThe automation should trigger when the binary sensor fires and the vacuum is running. The vacuum\nshould be paused (not stopped) and can be resumed manually.\n","","Error talking to API","vacuum_pause-test_vacuum_not_changed_when_idle","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"vacuum_pause_vacuum_pause-vacuum_pause-3","gemma-3-27b-it","vacuum","# Vacuum Pause\n\n## Problem statement\n\nCreate a blueprint automation to pause the vacuum when I receive a phone call.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Pause the vacuum when I get a phone call from my relatives\n- Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. |\n| `vacuum_entity` | A `vacuum` entity to pause when the automation fires. |\n\nThe automation should trigger when the binary sensor fires and the vacuum is running. The vacuum\nshould be paused (not stopped) and can be resumed manually.\n","","Error talking to API","vacuum_pause-test_blueprint_inputs","Bad","AssertionError: Blueprint content is not available" +"vacuum_pause_vacuum_pause-vacuum_pause-3","gemma-3-27b-it","vacuum","# Vacuum Pause\n\n## Problem statement\n\nCreate a blueprint automation to pause the vacuum when I receive a phone call.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Pause the vacuum when I get a phone call from my relatives\n- Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. |\n| `vacuum_entity` | A `vacuum` entity to pause when the automation fires. |\n\nThe automation should trigger when the binary sensor fires and the vacuum is running. The vacuum\nshould be paused (not stopped) and can be resumed manually.\n","","Error talking to API","vacuum_pause-test_vacuum_running_and_paused","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"vacuum_pause_vacuum_pause-vacuum_pause-3","gemma-3-27b-it","vacuum","# Vacuum Pause\n\n## Problem statement\n\nCreate a blueprint automation to pause the vacuum when I receive a phone call.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Pause the vacuum when I get a phone call from my relatives\n- Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. |\n| `vacuum_entity` | A `vacuum` entity to pause when the automation fires. |\n\nThe automation should trigger when the binary sensor fires and the vacuum is running. The vacuum\nshould be paused (not stopped) and can be resumed manually.\n","","Error talking to API","vacuum_pause-test_vacuum_not_changed_when_idle","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"vacuum_pause_vacuum_pause-vacuum_pause-4","gemma-3-27b-it","vacuum","# Vacuum Pause\n\n## Problem statement\n\nCreate a blueprint automation to pause the vacuum when I receive a phone call.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Pause the vacuum when I get a phone call from my relatives\n- Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. |\n| `vacuum_entity` | A `vacuum` entity to pause when the automation fires. |\n\nThe automation should trigger when the binary sensor fires and the vacuum is running. The vacuum\nshould be paused (not stopped) and can be resumed manually.\n","","Error talking to API","vacuum_pause-test_blueprint_inputs","Bad","AssertionError: Blueprint content is not available" +"vacuum_pause_vacuum_pause-vacuum_pause-4","gemma-3-27b-it","vacuum","# Vacuum Pause\n\n## Problem statement\n\nCreate a blueprint automation to pause the vacuum when I receive a phone call.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Pause the vacuum when I get a phone call from my relatives\n- Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. |\n| `vacuum_entity` | A `vacuum` entity to pause when the automation fires. |\n\nThe automation should trigger when the binary sensor fires and the vacuum is running. The vacuum\nshould be paused (not stopped) and can be resumed manually.\n","","Error talking to API","vacuum_pause-test_vacuum_running_and_paused","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" +"vacuum_pause_vacuum_pause-vacuum_pause-4","gemma-3-27b-it","vacuum","# Vacuum Pause\n\n## Problem statement\n\nCreate a blueprint automation to pause the vacuum when I receive a phone call.\n\n## Example use cases\n\nThese are example use cases that could be used with the blueprint:\n\n- Pause the vacuum when I get a phone call from my relatives\n- Don't interrupt my work video convererence call with vacuum noise by pausing it when the call starts\n\n## Detailed Description\n\nThe blueprint should accept two inputs:\n\n| Input | Description |\n| ------------------- | --------------------------------------------------------------------- |\n| `phone_call_sensor` | A `binary_sensor` entity that triggers when a phone call is received. |\n| `vacuum_entity` | A `vacuum` entity to pause when the automation fires. |\n\nThe automation should trigger when the binary sensor fires and the vacuum is running. The vacuum\nshould be paused (not stopped) and can be resumed manually.\n","","Error talking to API","vacuum_pause-test_vacuum_not_changed_when_idle","Bad","AssertionError: Could not extract YAML from model response: Error talking to API" diff --git a/reports/automations/2026.2.2/reports-by-category.yaml b/reports/automations/2026.2.2/reports-by-category.yaml new file mode 100644 index 000000000..0f89cd8c2 --- /dev/null +++ b/reports/automations/2026.2.2/reports-by-category.yaml @@ -0,0 +1,22 @@ +--- +- category: fan + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 15 +- category: light + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 20 +- category: media_player + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 10 +- category: vacuum + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 15 + diff --git a/reports/automations/2026.2.2/reports-by-model-category.yaml b/reports/automations/2026.2.2/reports-by-model-category.yaml new file mode 100644 index 000000000..b48178a22 --- /dev/null +++ b/reports/automations/2026.2.2/reports-by-model-category.yaml @@ -0,0 +1,22 @@ +--- +- model_id-category: gemma-3-27b-it-fan + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 15 +- model_id-category: gemma-3-27b-it-light + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 20 +- model_id-category: gemma-3-27b-it-media_player + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 10 +- model_id-category: gemma-3-27b-it-vacuum + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 15 + diff --git a/reports/automations/2026.2.2/reports-by-model-test-name.yaml b/reports/automations/2026.2.2/reports-by-model-test-name.yaml new file mode 100644 index 000000000..02ad18108 --- /dev/null +++ b/reports/automations/2026.2.2/reports-by-model-test-name.yaml @@ -0,0 +1,62 @@ +--- +- model_id-task_name: gemma-3-27b-it-door_left_open-test_blueprint_inputs + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 5 +- model_id-task_name: gemma-3-27b-it-door_left_open-test_door_open_plays_media + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 5 +- model_id-task_name: gemma-3-27b-it-humidity_fan-test_blueprint_inputs + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 5 +- model_id-task_name: gemma-3-27b-it-humidity_fan-test_fan_triggered_off + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 5 +- model_id-task_name: gemma-3-27b-it-humidity_fan-test_fan_triggered_on + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 5 +- model_id-task_name: gemma-3-27b-it-light_on_door-test_blueprint_inputs + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 5 +- model_id-task_name: gemma-3-27b-it-light_on_door-test_door_open + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 5 +- model_id-task_name: gemma-3-27b-it-light_on_door-test_door_open_close + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 5 +- model_id-task_name: gemma-3-27b-it-light_on_door-test_light_timeout + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 5 +- model_id-task_name: gemma-3-27b-it-vacuum_pause-test_blueprint_inputs + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 5 +- model_id-task_name: gemma-3-27b-it-vacuum_pause-test_vacuum_not_changed_when_idle + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 5 +- model_id-task_name: gemma-3-27b-it-vacuum_pause-test_vacuum_running_and_paused + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 5 + diff --git a/reports/automations/2026.2.2/reports-by-task-id.yaml b/reports/automations/2026.2.2/reports-by-task-id.yaml new file mode 100644 index 000000000..968df6e41 --- /dev/null +++ b/reports/automations/2026.2.2/reports-by-task-id.yaml @@ -0,0 +1,102 @@ +--- +- task_id: door_left_open_door_left_open-door_left_open-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 2 +- task_id: door_left_open_door_left_open-door_left_open-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 2 +- task_id: door_left_open_door_left_open-door_left_open-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 2 +- task_id: door_left_open_door_left_open-door_left_open-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 2 +- task_id: door_left_open_door_left_open-door_left_open-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 2 +- task_id: humidity_fan_humidity_fan-humidity_fan-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 3 +- task_id: humidity_fan_humidity_fan-humidity_fan-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 3 +- task_id: humidity_fan_humidity_fan-humidity_fan-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 3 +- task_id: humidity_fan_humidity_fan-humidity_fan-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 3 +- task_id: humidity_fan_humidity_fan-humidity_fan-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 3 +- task_id: light_on_door_light_on_door-light_on_door-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 4 +- task_id: light_on_door_light_on_door-light_on_door-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 4 +- task_id: light_on_door_light_on_door-light_on_door-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 4 +- task_id: light_on_door_light_on_door-light_on_door-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 4 +- task_id: light_on_door_light_on_door-light_on_door-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 4 +- task_id: vacuum_pause_vacuum_pause-vacuum_pause-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 3 +- task_id: vacuum_pause_vacuum_pause-vacuum_pause-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 3 +- task_id: vacuum_pause_vacuum_pause-vacuum_pause-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 3 +- task_id: vacuum_pause_vacuum_pause-vacuum_pause-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 3 +- task_id: vacuum_pause_vacuum_pause-vacuum_pause-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 3 + diff --git a/reports/automations/2026.2.2/reports-by-test-name.yaml b/reports/automations/2026.2.2/reports-by-test-name.yaml new file mode 100644 index 000000000..2155013b0 --- /dev/null +++ b/reports/automations/2026.2.2/reports-by-test-name.yaml @@ -0,0 +1,62 @@ +--- +- task_name: door_left_open-test_blueprint_inputs + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 5 +- task_name: door_left_open-test_door_open_plays_media + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 5 +- task_name: humidity_fan-test_blueprint_inputs + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 5 +- task_name: humidity_fan-test_fan_triggered_off + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 5 +- task_name: humidity_fan-test_fan_triggered_on + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 5 +- task_name: light_on_door-test_blueprint_inputs + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 5 +- task_name: light_on_door-test_door_open + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 5 +- task_name: light_on_door-test_door_open_close + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 5 +- task_name: light_on_door-test_light_timeout + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 5 +- task_name: vacuum_pause-test_blueprint_inputs + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 5 +- task_name: vacuum_pause-test_vacuum_not_changed_when_idle + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 5 +- task_name: vacuum_pause-test_vacuum_running_and_paused + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 5 + diff --git a/reports/automations/2026.2.2/reports-token-stats.yaml b/reports/automations/2026.2.2/reports-token-stats.yaml new file mode 100644 index 000000000..63b69c89f --- /dev/null +++ b/reports/automations/2026.2.2/reports-token-stats.yaml @@ -0,0 +1,2 @@ +--- [] + diff --git a/reports/automations/2026.2.2/reports.yaml b/reports/automations/2026.2.2/reports.yaml new file mode 100644 index 000000000..d9bba51c6 --- /dev/null +++ b/reports/automations/2026.2.2/reports.yaml @@ -0,0 +1,7 @@ +--- +- model_id: gemma-3-27b-it + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 60 + diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/_scrape_context.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/_scrape_context.yaml new file mode 100644 index 000000000..ba7ac71f0 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/_scrape_context.yaml @@ -0,0 +1,19 @@ +--- +uuid: b33c321b-3e8f-4d7f-8e2a-9da8e6c0a08d +timestamp: 2026-02-16 08:07:46.294464 +scrape_config: + dataset: questions + dataset_path: datasets/questions + dataset_version: null + model_id: gemma-3-27b-it + model_output_path: reports/questions/2026.2.2 +version: 2026.2.2 +context: + user: runner + argv: + - /home/runner/work/openrouter-benchmarks/openrouter-benchmarks/.venv/bin/pytest + - home_assistant_datasets/tool/assist/collect + - --models=gemma-3-27b-it + - --dataset=datasets/questions/ + - --model_output_dir=reports/questions/2026.2.2 +notes: '' diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-0.yaml new file mode 100644 index 000000000..0601006ed --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-0.yaml @@ -0,0 +1,285 @@ +--- +uuid: 23ec693e-971b-43c7-bb84-cdaa55452524 +task_id: suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-0 +model_id: gemma-3-27b-it +category: +- climate +- question +task: + input_text: If the temperature in the guest room is above 23 then set the target + temperature to 22 + expect_changes: + climate.guest_room_thermostat: + state: null + attributes: + temperature: 22.0 +response: Error talking to API +context: + unexpected_states: + climate.guest_room_thermostat: + expected: + temperature: 22.0 + got: + temperature: 26 + conversation_trace: + - event_type: async_process + data: + text: If the temperature in the guest room is above 23 then set the target temperature + to 22 + context: + id: 01KHJQVJ3FSN9XB9V2P737ZQW7 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:48.079178+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:07:48.081080+00:00 + - role: user + content: If the temperature in the guest room is above 23 then set the target + temperature to 22 + attachments: null + created: 2026-02-16 08:07:48.079281+00:00 + tools: + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f42dd0c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:48.081094+00:00 + duration_ms: 170.848 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-1.yaml new file mode 100644 index 000000000..c2da7df9b --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-1.yaml @@ -0,0 +1,285 @@ +--- +uuid: 786ac14b-ab2b-493c-8367-e8ab62b72854 +task_id: suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-1 +model_id: gemma-3-27b-it +category: +- climate +- question +task: + input_text: If the temperature in the guest room is above 23 then set the target + temperature to 22 + expect_changes: + climate.guest_room_thermostat: + state: null + attributes: + temperature: 22.0 +response: Error talking to API +context: + unexpected_states: + climate.guest_room_thermostat: + expected: + temperature: 22.0 + got: + temperature: 26 + conversation_trace: + - event_type: async_process + data: + text: If the temperature in the guest room is above 23 then set the target temperature + to 22 + context: + id: 01KHJQVJF99DT3FK56899ADSWT + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:48.457928+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:07:48.459876+00:00 + - role: user + content: If the temperature in the guest room is above 23 then set the target + temperature to 22 + attachments: null + created: 2026-02-16 08:07:48.458012+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f63afab0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:48.459889+00:00 + duration_ms: 168.851 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-2.yaml new file mode 100644 index 000000000..7afd37b29 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-2.yaml @@ -0,0 +1,285 @@ +--- +uuid: ee4c4b25-ab65-445e-9757-1d312062b99f +task_id: suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-2 +model_id: gemma-3-27b-it +category: +- climate +- question +task: + input_text: If the temperature in the guest room is above 23 then set the target + temperature to 22 + expect_changes: + climate.guest_room_thermostat: + state: null + attributes: + temperature: 22.0 +response: Error talking to API +context: + unexpected_states: + climate.guest_room_thermostat: + expected: + temperature: 22.0 + got: + temperature: 26 + conversation_trace: + - event_type: async_process + data: + text: If the temperature in the guest room is above 23 then set the target temperature + to 22 + context: + id: 01KHJQVJVC2JN6CQ81REXQEFRY + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:48.845002+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: light + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:07:48.847726+00:00 + - role: user + content: If the temperature in the guest room is above 23 then set the target + temperature to 22 + attachments: null + created: 2026-02-16 08:07:48.845074+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f43614e0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:48.847736+00:00 + duration_ms: 167.171 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-3.yaml new file mode 100644 index 000000000..f41245881 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-3.yaml @@ -0,0 +1,285 @@ +--- +uuid: 805dbbcd-552a-4c2b-a1e7-2752a95ad6f9 +task_id: suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-3 +model_id: gemma-3-27b-it +category: +- climate +- question +task: + input_text: If the temperature in the guest room is above 23 then set the target + temperature to 22 + expect_changes: + climate.guest_room_thermostat: + state: null + attributes: + temperature: 22.0 +response: Error talking to API +context: + unexpected_states: + climate.guest_room_thermostat: + expected: + temperature: 22.0 + got: + temperature: 26 + conversation_trace: + - event_type: async_process + data: + text: If the temperature in the guest room is above 23 then set the target temperature + to 22 + context: + id: 01KHJQVK8B1DY1Z0GXBPKWZE0Z + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:49.259707+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:07:49.261594+00:00 + - role: user + content: If the temperature in the guest room is above 23 then set the target + temperature to 22 + attachments: null + created: 2026-02-16 08:07:49.259778+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f50cc040>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:49.261604+00:00 + duration_ms: 176.124 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-4.yaml new file mode 100644 index 000000000..50b5b1c5c --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-4.yaml @@ -0,0 +1,285 @@ +--- +uuid: 9779319a-ac9a-4223-98d1-8ca3bb8472fa +task_id: suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-4 +model_id: gemma-3-27b-it +category: +- climate +- question +task: + input_text: If the temperature in the guest room is above 23 then set the target + temperature to 22 + expect_changes: + climate.guest_room_thermostat: + state: null + attributes: + temperature: 22.0 +response: Error talking to API +context: + unexpected_states: + climate.guest_room_thermostat: + expected: + temperature: 22.0 + got: + temperature: 26 + conversation_trace: + - event_type: async_process + data: + text: If the temperature in the guest room is above 23 then set the target temperature + to 22 + context: + id: 01KHJQVKMRFE72Y33GB2Q4M89S + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:49.656823+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:07:49.658633+00:00 + - role: user + content: If the temperature in the guest room is above 23 then set the target + temperature to 22 + attachments: null + created: 2026-02-16 08:07:49.656892+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4e5b1c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:49.658643+00:00 + duration_ms: 175.153 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-5.yaml new file mode 100644 index 000000000..2dbd63cfc --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-5.yaml @@ -0,0 +1,285 @@ +--- +uuid: d4ef80f9-5e77-4c7e-b080-80b106aecf70 +task_id: suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-5 +model_id: gemma-3-27b-it +category: +- climate +- question +task: + input_text: If the temperature in the guest room is above 23 then set the target + temperature to 22 + expect_changes: + climate.guest_room_thermostat: + state: null + attributes: + temperature: 22.0 +response: Error talking to API +context: + unexpected_states: + climate.guest_room_thermostat: + expected: + temperature: 22.0 + got: + temperature: 26 + conversation_trace: + - event_type: async_process + data: + text: If the temperature in the guest room is above 23 then set the target temperature + to 22 + context: + id: 01KHJQVM103EXNVJ3AGH16C0FD + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:50.048417+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: light + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:07:50.053906+00:00 + - role: user + content: If the temperature in the guest room is above 23 then set the target + temperature to 22 + attachments: null + created: 2026-02-16 08:07:50.048487+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4edfcc0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:50.053918+00:00 + duration_ms: 238.529 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-6.yaml new file mode 100644 index 000000000..70c425692 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-6.yaml @@ -0,0 +1,285 @@ +--- +uuid: a8ab0aa2-7242-4320-90e5-ca6c8990737d +task_id: suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-6 +model_id: gemma-3-27b-it +category: +- climate +- question +task: + input_text: If the temperature in the guest room is above 23 then set the target + temperature to 22 + expect_changes: + climate.guest_room_thermostat: + state: null + attributes: + temperature: 22.0 +response: Error talking to API +context: + unexpected_states: + climate.guest_room_thermostat: + expected: + temperature: 22.0 + got: + temperature: 26 + conversation_trace: + - event_type: async_process + data: + text: If the temperature in the guest room is above 23 then set the target temperature + to 22 + context: + id: 01KHJQVMF66KYGRCZH8DKC83CX + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:50.502585+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: light + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:07:50.505526+00:00 + - role: user + content: If the temperature in the guest room is above 23 then set the target + temperature to 22 + attachments: null + created: 2026-02-16 08:07:50.502653+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f63a2f00>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:50.505539+00:00 + duration_ms: 205.213 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-7.yaml new file mode 100644 index 000000000..fb8fa5f1a --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-7.yaml @@ -0,0 +1,285 @@ +--- +uuid: c2247af4-ac7c-4c79-b3ce-490a5b1c6c9c +task_id: suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-7 +model_id: gemma-3-27b-it +category: +- climate +- question +task: + input_text: If the temperature in the guest room is above 23 then set the target + temperature to 22 + expect_changes: + climate.guest_room_thermostat: + state: null + attributes: + temperature: 22.0 +response: Error talking to API +context: + unexpected_states: + climate.guest_room_thermostat: + expected: + temperature: 22.0 + got: + temperature: 26 + conversation_trace: + - event_type: async_process + data: + text: If the temperature in the guest room is above 23 then set the target temperature + to 22 + context: + id: 01KHJQVMWF5T8W0ZEYWEV5ZWFJ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:50.927982+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:07:50.929805+00:00 + - role: user + content: If the temperature in the guest room is above 23 then set the target + temperature to 22 + attachments: null + created: 2026-02-16 08:07:50.928051+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4bca350>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:50.929815+00:00 + duration_ms: 169.626 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-8.yaml new file mode 100644 index 000000000..21b882806 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-8.yaml @@ -0,0 +1,285 @@ +--- +uuid: 3a52e36f-975a-414b-878b-4f41b806cc5e +task_id: suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-8 +model_id: gemma-3-27b-it +category: +- climate +- question +task: + input_text: If the temperature in the guest room is above 23 then set the target + temperature to 22 + expect_changes: + climate.guest_room_thermostat: + state: null + attributes: + temperature: 22.0 +response: Error talking to API +context: + unexpected_states: + climate.guest_room_thermostat: + expected: + temperature: 22.0 + got: + temperature: 26 + conversation_trace: + - event_type: async_process + data: + text: If the temperature in the guest room is above 23 then set the target temperature + to 22 + context: + id: 01KHJQVN9F24WYARB7ZB10SGV8 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:51.343180+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: light + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:07:51.347322+00:00 + - role: user + content: If the temperature in the guest room is above 23 then set the target + temperature to 22 + attachments: null + created: 2026-02-16 08:07:51.343273+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4ae9900>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:51.347333+00:00 + duration_ms: 191.565 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-9.yaml new file mode 100644 index 000000000..89e02b8ce --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-9.yaml @@ -0,0 +1,285 @@ +--- +uuid: fa8adc06-9a77-4bae-af3b-01ad1fee258e +task_id: suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-9 +model_id: gemma-3-27b-it +category: +- climate +- question +task: + input_text: If the temperature in the guest room is above 23 then set the target + temperature to 22 + expect_changes: + climate.guest_room_thermostat: + state: null + attributes: + temperature: 22.0 +response: Error talking to API +context: + unexpected_states: + climate.guest_room_thermostat: + expected: + temperature: 22.0 + got: + temperature: 26 + conversation_trace: + - event_type: async_process + data: + text: If the temperature in the guest room is above 23 then set the target temperature + to 22 + context: + id: 01KHJQVNPM60Q88CK55HT36J3P + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:51.764628+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:07:51.766348+00:00 + - role: user + content: If the temperature in the guest room is above 23 then set the target + temperature to 22 + attachments: null + created: 2026-02-16 08:07:51.764699+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4e6bcc0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:51.766359+00:00 + duration_ms: 169.688 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-0.yaml new file mode 100644 index 000000000..eab1d537b --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-0.yaml @@ -0,0 +1,277 @@ +--- +uuid: af24064b-dd4d-48fb-8e8f-a8f8a9486468 +task_id: suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-0 +model_id: gemma-3-27b-it +category: +- climate +- question +task: + input_text: Set the target temperature to 22 in the guest room if it is above 23 + expect_changes: + climate.guest_room_thermostat: + state: cool + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Set the target temperature to 22 in the guest room if it is above 23 + context: + id: 01KHJQVP3RWK8Q42SCRGN3CM5N + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:52.184847+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:07:52.186719+00:00 + - role: user + content: Set the target temperature to 22 in the guest room if it is above + 23 + attachments: null + created: 2026-02-16 08:07:52.184918+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4eddd20>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:52.186729+00:00 + duration_ms: 181.86 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-1.yaml new file mode 100644 index 000000000..7d2bde655 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-1.yaml @@ -0,0 +1,277 @@ +--- +uuid: 6468112a-96f5-4aad-bbb3-53469dd73e69 +task_id: suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-1 +model_id: gemma-3-27b-it +category: +- climate +- question +task: + input_text: Set the target temperature to 22 in the guest room if it is above 23 + expect_changes: + climate.guest_room_thermostat: + state: cool + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Set the target temperature to 22 in the guest room if it is above 23 + context: + id: 01KHJQVPGMWSFMXMZF6SK9JP0N + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:52.596183+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:07:52.598061+00:00 + - role: user + content: Set the target temperature to 22 in the guest room if it is above + 23 + attachments: null + created: 2026-02-16 08:07:52.596292+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f46310c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:52.598071+00:00 + duration_ms: 169.487 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-2.yaml new file mode 100644 index 000000000..77db85365 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-2.yaml @@ -0,0 +1,277 @@ +--- +uuid: c3a15e1e-9012-4d70-867f-68799e72ec96 +task_id: suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-2 +model_id: gemma-3-27b-it +category: +- climate +- question +task: + input_text: Set the target temperature to 22 in the guest room if it is above 23 + expect_changes: + climate.guest_room_thermostat: + state: cool + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Set the target temperature to 22 in the guest room if it is above 23 + context: + id: 01KHJQVQ64TSH0A5SQVB848CZ2 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:53.284329+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:07:53.290452+00:00 + - role: user + content: Set the target temperature to 22 in the guest room if it is above + 23 + attachments: null + created: 2026-02-16 08:07:53.284400+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4daa820>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:53.290464+00:00 + duration_ms: 177.505 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-3.yaml new file mode 100644 index 000000000..a9a91036a --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-3.yaml @@ -0,0 +1,277 @@ +--- +uuid: 93614663-68c7-4ae2-8be4-e9fb1c7fdd9b +task_id: suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-3 +model_id: gemma-3-27b-it +category: +- climate +- question +task: + input_text: Set the target temperature to 22 in the guest room if it is above 23 + expect_changes: + climate.guest_room_thermostat: + state: cool + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Set the target temperature to 22 in the guest room if it is above 23 + context: + id: 01KHJQVQJPTC72MERM7NPGTHQB + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:53.686346+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:07:53.689506+00:00 + - role: user + content: Set the target temperature to 22 in the guest room if it is above + 23 + attachments: null + created: 2026-02-16 08:07:53.686421+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f484d7a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:53.689518+00:00 + duration_ms: 24.027 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-4.yaml new file mode 100644 index 000000000..21613b987 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-4.yaml @@ -0,0 +1,277 @@ +--- +uuid: eae32d92-ac79-489d-877c-c05265a423aa +task_id: suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-4 +model_id: gemma-3-27b-it +category: +- climate +- question +task: + input_text: Set the target temperature to 22 in the guest room if it is above 23 + expect_changes: + climate.guest_room_thermostat: + state: cool + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Set the target temperature to 22 in the guest room if it is above 23 + context: + id: 01KHJQVQT6JRDP8W37PAAV6H9E + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:53.926626+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: light + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:07:53.947735+00:00 + - role: user + content: Set the target temperature to 22 in the guest room if it is above + 23 + attachments: null + created: 2026-02-16 08:07:53.926695+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f61ca1f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:53.947751+00:00 + duration_ms: 44.129 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-5.yaml new file mode 100644 index 000000000..d8cf95a3b --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-5.yaml @@ -0,0 +1,277 @@ +--- +uuid: 85001b52-3901-4967-b914-29b6a48bf830 +task_id: suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-5 +model_id: gemma-3-27b-it +category: +- climate +- question +task: + input_text: Set the target temperature to 22 in the guest room if it is above 23 + expect_changes: + climate.guest_room_thermostat: + state: cool + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Set the target temperature to 22 in the guest room if it is above 23 + context: + id: 01KHJQVR3AB1X41RT68H57DSDJ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:54.218721+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: light + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:07:54.220596+00:00 + - role: user + content: Set the target temperature to 22 in the guest room if it is above + 23 + attachments: null + created: 2026-02-16 08:07:54.218794+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4f6bb60>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:54.220606+00:00 + duration_ms: 183.011 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-6.yaml new file mode 100644 index 000000000..a6dd1fa53 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-6.yaml @@ -0,0 +1,277 @@ +--- +uuid: 2b211923-97ce-4450-9f6d-6ae032495131 +task_id: suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-6 +model_id: gemma-3-27b-it +category: +- climate +- question +task: + input_text: Set the target temperature to 22 in the guest room if it is above 23 + expect_changes: + climate.guest_room_thermostat: + state: cool + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Set the target temperature to 22 in the guest room if it is above 23 + context: + id: 01KHJQVRFR6BAVR8DEFBDE9BG5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:54.616851+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:07:54.618682+00:00 + - role: user + content: Set the target temperature to 22 in the guest room if it is above + 23 + attachments: null + created: 2026-02-16 08:07:54.616921+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f44e2da0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:54.618692+00:00 + duration_ms: 171.363 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-7.yaml new file mode 100644 index 000000000..01dc6e976 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-7.yaml @@ -0,0 +1,277 @@ +--- +uuid: ff42c4fa-b506-4d49-9386-ce148de22e70 +task_id: suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-7 +model_id: gemma-3-27b-it +category: +- climate +- question +task: + input_text: Set the target temperature to 22 in the guest room if it is above 23 + expect_changes: + climate.guest_room_thermostat: + state: cool + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Set the target temperature to 22 in the guest room if it is above 23 + context: + id: 01KHJQVRVX1XJDZ015H4P2FX0K + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:55.005873+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: light + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:07:55.007750+00:00 + - role: user + content: Set the target temperature to 22 in the guest room if it is above + 23 + attachments: null + created: 2026-02-16 08:07:55.005944+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4b64b40>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:55.007760+00:00 + duration_ms: 24.985 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-8.yaml new file mode 100644 index 000000000..848ddc06f --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-8.yaml @@ -0,0 +1,277 @@ +--- +uuid: 9c9a9388-ede6-4d27-aecd-11b230f730a3 +task_id: suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-8 +model_id: gemma-3-27b-it +category: +- climate +- question +task: + input_text: Set the target temperature to 22 in the guest room if it is above 23 + expect_changes: + climate.guest_room_thermostat: + state: cool + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Set the target temperature to 22 in the guest room if it is above 23 + context: + id: 01KHJQVS43YNS9ZCE73GCY2PJ9 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:55.267187+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: light + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:07:55.269049+00:00 + - role: user + content: Set the target temperature to 22 in the guest room if it is above + 23 + attachments: null + created: 2026-02-16 08:07:55.267291+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f50bb950>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:55.269059+00:00 + duration_ms: 175.072 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-9.yaml new file mode 100644 index 000000000..aae6c7232 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-9.yaml @@ -0,0 +1,277 @@ +--- +uuid: fbed3395-847e-435e-9241-d32f06917c7f +task_id: suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-9 +model_id: gemma-3-27b-it +category: +- climate +- question +task: + input_text: Set the target temperature to 22 in the guest room if it is above 23 + expect_changes: + climate.guest_room_thermostat: + state: cool + attributes: null +response: Error talking to API +context: + unexpected_states: {} + conversation_trace: + - event_type: async_process + data: + text: Set the target temperature to 22 in the guest room if it is above 23 + context: + id: 01KHJQVSH3932BB5AP67WGPC1F + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:55.683924+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:07:55.685856+00:00 + - role: user + content: Set the target temperature to 22 in the guest room if it is above + 23 + attachments: null + created: 2026-02-16 08:07:55.683997+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4bc9170>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:55.685866+00:00 + duration_ms: 167.928 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-0.yaml new file mode 100644 index 000000000..a6af931b8 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-0.yaml @@ -0,0 +1,274 @@ +--- +uuid: 967a6b11-1ce4-434a-8d15-6c0d3e65ee7b +task_id: suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-0 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: Is the kitchen sink battery low? + expect_response: + - 'no' + - '95' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the kitchen sink battery low? + context: + id: 01KHJQVWHGR0YM50Z7NJ2EYFAP + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:58.768618+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:07:58.770574+00:00 + - role: user + content: Is the kitchen sink battery low? + attachments: null + created: 2026-02-16 08:07:58.768708+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47d429c0f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:58.770584+00:00 + duration_ms: 23.789 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-1.yaml new file mode 100644 index 000000000..86b573579 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-1.yaml @@ -0,0 +1,274 @@ +--- +uuid: c489f3e5-1e91-46c0-b30d-5c34809f3623 +task_id: suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-1 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: Is the kitchen sink battery low? + expect_response: + - 'no' + - '95' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the kitchen sink battery low? + context: + id: 01KHJQVWRTM4H6X0Q72CQRVC43 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:59.002144+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:07:59.004298+00:00 + - role: user + content: Is the kitchen sink battery low? + attachments: null + created: 2026-02-16 08:07:59.002212+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4998040>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:59.004308+00:00 + duration_ms: 179.753 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-2.yaml new file mode 100644 index 000000000..108c28e50 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-2.yaml @@ -0,0 +1,274 @@ +--- +uuid: 8f1ced7a-d775-4bd9-8a2c-c50f195e5481 +task_id: suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-2 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: Is the kitchen sink battery low? + expect_response: + - 'no' + - '95' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the kitchen sink battery low? + context: + id: 01KHJQVX4YRW7MMPE5FP48RW7Y + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:59.390888+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:07:59.392775+00:00 + - role: user + content: Is the kitchen sink battery low? + attachments: null + created: 2026-02-16 08:07:59.390956+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f48bfb60>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:59.392784+00:00 + duration_ms: 24.887 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-3.yaml new file mode 100644 index 000000000..44336bbe1 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-3.yaml @@ -0,0 +1,274 @@ +--- +uuid: ecd24d9e-f3fd-41e6-a391-4c79301343d7 +task_id: suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-3 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: Is the kitchen sink battery low? + expect_response: + - 'no' + - '95' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the kitchen sink battery low? + context: + id: 01KHJQVXCQR654Q3X5T80CWFH2 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:59.639405+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:07:59.641153+00:00 + - role: user + content: Is the kitchen sink battery low? + attachments: null + created: 2026-02-16 08:07:59.639474+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f46f4eb0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:59.641162+00:00 + duration_ms: 22.879 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-4.yaml new file mode 100644 index 000000000..cda83c601 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-4.yaml @@ -0,0 +1,274 @@ +--- +uuid: 1069f5f4-061f-42eb-96aa-b3a9b9a274b4 +task_id: suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-4 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: Is the kitchen sink battery low? + expect_response: + - 'no' + - '95' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the kitchen sink battery low? + context: + id: 01KHJQVXNM3934EGK5XNYCHZ36 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:07:59.924334+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:07:59.926186+00:00 + - role: user + content: Is the kitchen sink battery low? + attachments: null + created: 2026-02-16 08:07:59.924409+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f61631c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:07:59.926196+00:00 + duration_ms: 32.702 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-5.yaml new file mode 100644 index 000000000..1894eed0d --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-5.yaml @@ -0,0 +1,274 @@ +--- +uuid: 35767f97-fb09-48be-a9a9-3b8a4dfb3170 +task_id: suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-5 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: Is the kitchen sink battery low? + expect_response: + - 'no' + - '95' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the kitchen sink battery low? + context: + id: 01KHJQVXX7Y66HRN1CVYA1JRMC + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:00.167182+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:00.169088+00:00 + - role: user + content: Is the kitchen sink battery low? + attachments: null + created: 2026-02-16 08:08:00.167279+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f61db320>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:00.169097+00:00 + duration_ms: 27.503 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-6.yaml new file mode 100644 index 000000000..45079d452 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-6.yaml @@ -0,0 +1,274 @@ +--- +uuid: d1b2ceac-b8ba-47d8-b909-d357cfc56d70 +task_id: suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-6 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: Is the kitchen sink battery low? + expect_response: + - 'no' + - '95' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the kitchen sink battery low? + context: + id: 01KHJQVY46M5E35QCFEPT2Y5S7 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:00.390767+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: light + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:00.392626+00:00 + - role: user + content: Is the kitchen sink battery low? + attachments: null + created: 2026-02-16 08:08:00.390837+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1f21220>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:00.392636+00:00 + duration_ms: 566.137 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-7.yaml new file mode 100644 index 000000000..0da6d1ed7 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-7.yaml @@ -0,0 +1,274 @@ +--- +uuid: a76f47fb-84d4-4a1d-ae49-44bf00e576d4 +task_id: suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-7 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: Is the kitchen sink battery low? + expect_response: + - 'no' + - '95' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the kitchen sink battery low? + context: + id: 01KHJQVYWPDGSGATQK7Q1D45MH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:01.174532+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:01.176375+00:00 + - role: user + content: Is the kitchen sink battery low? + attachments: null + created: 2026-02-16 08:08:01.174601+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4baecf0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:01.176385+00:00 + duration_ms: 177.444 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-8.yaml new file mode 100644 index 000000000..5e5e2aef9 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-8.yaml @@ -0,0 +1,274 @@ +--- +uuid: 97230755-8925-4b1c-b9bc-443e221a558a +task_id: suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-8 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: Is the kitchen sink battery low? + expect_response: + - 'no' + - '95' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the kitchen sink battery low? + context: + id: 01KHJQVZ938H5E6ZD7C0QSYR8B + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:01.571943+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:01.574163+00:00 + - role: user + content: Is the kitchen sink battery low? + attachments: null + created: 2026-02-16 08:08:01.572014+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f63db740>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:01.574173+00:00 + duration_ms: 26.664 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-9.yaml new file mode 100644 index 000000000..4cf0ad354 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-9.yaml @@ -0,0 +1,274 @@ +--- +uuid: 1a8e23b2-2027-443b-b8b8-3b9a6b277bc3 +task_id: suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-9 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: Is the kitchen sink battery low? + expect_response: + - 'no' + - '95' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the kitchen sink battery low? + context: + id: 01KHJQVZGTC3YD851HM8EGSE0X + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:01.818986+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:01.820862+00:00 + - role: user + content: Is the kitchen sink battery low? + attachments: null + created: 2026-02-16 08:08:01.819063+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4d22e50>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:01.820873+00:00 + duration_ms: 25.22 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-0.yaml new file mode 100644 index 000000000..abbe9125d --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-0.yaml @@ -0,0 +1,274 @@ +--- +uuid: e138a545-371a-4406-bdb6-00568f9ca7d8 +task_id: suburban_familiy_home_be_switch-is_the_kitchen_plug_on-0 +model_id: gemma-3-27b-it +category: +- switch +- question +task: + input_text: Is the kitchen plug on? + expect_response: + - 'yes' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the kitchen plug on? + context: + id: 01KHJQVZRMCF6KXETYYEQWVQF3 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:02.068977+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:02.072196+00:00 + - role: user + content: Is the kitchen plug on? + attachments: null + created: 2026-02-16 08:08:02.069047+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4e217a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:02.072207+00:00 + duration_ms: 32.957 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-1.yaml new file mode 100644 index 000000000..2c20e2278 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-1.yaml @@ -0,0 +1,274 @@ +--- +uuid: b8aa082e-f01a-45af-9182-1813585bd485 +task_id: suburban_familiy_home_be_switch-is_the_kitchen_plug_on-1 +model_id: gemma-3-27b-it +category: +- switch +- question +task: + input_text: Is the kitchen plug on? + expect_response: + - 'yes' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the kitchen plug on? + context: + id: 01KHJQW01041WCK4XGH429JH1J + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:02.336330+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:02.339559+00:00 + - role: user + content: Is the kitchen plug on? + attachments: null + created: 2026-02-16 08:08:02.336401+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47d4199bc0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:02.339569+00:00 + duration_ms: 25.304 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-2.yaml new file mode 100644 index 000000000..6ae223a33 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-2.yaml @@ -0,0 +1,274 @@ +--- +uuid: 07da4caf-0432-4840-aa99-fe949cc1b860 +task_id: suburban_familiy_home_be_switch-is_the_kitchen_plug_on-2 +model_id: gemma-3-27b-it +category: +- switch +- question +task: + input_text: Is the kitchen plug on? + expect_response: + - 'yes' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the kitchen plug on? + context: + id: 01KHJQW08GKNYW9MSHYN205E9T + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:02.576881+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:02.578804+00:00 + - role: user + content: Is the kitchen plug on? + attachments: null + created: 2026-02-16 08:08:02.576949+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f45e4880>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:02.578813+00:00 + duration_ms: 25.206 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-3.yaml new file mode 100644 index 000000000..288446264 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-3.yaml @@ -0,0 +1,274 @@ +--- +uuid: 8a582096-9fc2-47d1-8987-1f50f7cadb00 +task_id: suburban_familiy_home_be_switch-is_the_kitchen_plug_on-3 +model_id: gemma-3-27b-it +category: +- switch +- question +task: + input_text: Is the kitchen plug on? + expect_response: + - 'yes' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the kitchen plug on? + context: + id: 01KHJQW0FY05QZM105YH1GK9BD + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:02.814179+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:02.818455+00:00 + - role: user + content: Is the kitchen plug on? + attachments: null + created: 2026-02-16 08:08:02.814275+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1ed16f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:02.818467+00:00 + duration_ms: 27.161 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-4.yaml new file mode 100644 index 000000000..4a3eaf3a2 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-4.yaml @@ -0,0 +1,274 @@ +--- +uuid: 633819dc-7c44-43bd-91d6-b5c1387037d2 +task_id: suburban_familiy_home_be_switch-is_the_kitchen_plug_on-4 +model_id: gemma-3-27b-it +category: +- switch +- question +task: + input_text: Is the kitchen plug on? + expect_response: + - 'yes' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the kitchen plug on? + context: + id: 01KHJQW0R42ZS3JB9CC8M7D33W + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:03.076722+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:03.078473+00:00 + - role: user + content: Is the kitchen plug on? + attachments: null + created: 2026-02-16 08:08:03.076789+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4629a60>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:03.078484+00:00 + duration_ms: 28.068 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-5.yaml new file mode 100644 index 000000000..dc01875ce --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-5.yaml @@ -0,0 +1,274 @@ +--- +uuid: dbfe937c-23db-4efa-8799-ae64248568d7 +task_id: suburban_familiy_home_be_switch-is_the_kitchen_plug_on-5 +model_id: gemma-3-27b-it +category: +- switch +- question +task: + input_text: Is the kitchen plug on? + expect_response: + - 'yes' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the kitchen plug on? + context: + id: 01KHJQW1APDYD7J3VEVVFF1707 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:03.670991+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:03.673445+00:00 + - role: user + content: Is the kitchen plug on? + attachments: null + created: 2026-02-16 08:08:03.671079+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c0e3e6c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:03.673457+00:00 + duration_ms: 30.274 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-6.yaml new file mode 100644 index 000000000..dc6256e36 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-6.yaml @@ -0,0 +1,274 @@ +--- +uuid: 17edf52b-bd60-42a4-a565-7f61df2650e8 +task_id: suburban_familiy_home_be_switch-is_the_kitchen_plug_on-6 +model_id: gemma-3-27b-it +category: +- switch +- question +task: + input_text: Is the kitchen plug on? + expect_response: + - 'yes' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the kitchen plug on? + context: + id: 01KHJQW1JW3SVD55JV8DPWNAY6 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:03.932340+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:03.936298+00:00 + - role: user + content: Is the kitchen plug on? + attachments: null + created: 2026-02-16 08:08:03.932438+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47d43428d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:03.936311+00:00 + duration_ms: 32.172 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-7.yaml new file mode 100644 index 000000000..04b45ece8 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-7.yaml @@ -0,0 +1,274 @@ +--- +uuid: 1569fde2-c069-4a59-9c56-ec1c2201ee47 +task_id: suburban_familiy_home_be_switch-is_the_kitchen_plug_on-7 +model_id: gemma-3-27b-it +category: +- switch +- question +task: + input_text: Is the kitchen plug on? + expect_response: + - 'yes' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the kitchen plug on? + context: + id: 01KHJQW1WBWWQ39Z4BSV8CXM2B + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:04.235735+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:04.237659+00:00 + - role: user + content: Is the kitchen plug on? + attachments: null + created: 2026-02-16 08:08:04.235807+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f63c7740>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:04.237669+00:00 + duration_ms: 34.23 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-8.yaml new file mode 100644 index 000000000..e121133fd --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-8.yaml @@ -0,0 +1,274 @@ +--- +uuid: ea3f9595-f883-4b6e-b7fc-801be6c7fcf2 +task_id: suburban_familiy_home_be_switch-is_the_kitchen_plug_on-8 +model_id: gemma-3-27b-it +category: +- switch +- question +task: + input_text: Is the kitchen plug on? + expect_response: + - 'yes' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the kitchen plug on? + context: + id: 01KHJQW24N4PHE7M379W8MJWDT + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:04.501443+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:04.503383+00:00 + - role: user + content: Is the kitchen plug on? + attachments: null + created: 2026-02-16 08:08:04.501514+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4944d50>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:04.503393+00:00 + duration_ms: 33.483 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-9.yaml new file mode 100644 index 000000000..eaec99014 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_kitchen_plug_on-9.yaml @@ -0,0 +1,274 @@ +--- +uuid: 8a5ed8f5-75b2-41ae-ab6c-45ed5d9b66e0 +task_id: suburban_familiy_home_be_switch-is_the_kitchen_plug_on-9 +model_id: gemma-3-27b-it +category: +- switch +- question +task: + input_text: Is the kitchen plug on? + expect_response: + - 'yes' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the kitchen plug on? + context: + id: 01KHJQW2C8GTDFX3SWW3M2WR13 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:04.744450+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:04.748737+00:00 + - role: user + content: Is the kitchen plug on? + attachments: null + created: 2026-02-16 08:08:04.744520+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47d43412d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:04.748748+00:00 + duration_ms: 26.234 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-0.yaml new file mode 100644 index 000000000..737afc79c --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-0.yaml @@ -0,0 +1,274 @@ +--- +uuid: 74b8c46c-18d5-41ab-b14a-a123e76388b6 +task_id: suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-0 +model_id: gemma-3-27b-it +category: +- switch +- question +task: + input_text: Is the plug in the kitchen on? + expect_response: + - 'no' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the plug in the kitchen on? + context: + id: 01KHJQW2KRKF4ZC975DBAX1Q7Q + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:04.984162+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:04.990704+00:00 + - role: user + content: Is the plug in the kitchen on? + attachments: null + created: 2026-02-16 08:08:04.984232+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1d60f60>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:04.990714+00:00 + duration_ms: 28.512 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-1.yaml new file mode 100644 index 000000000..4c7ce4d0b --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-1.yaml @@ -0,0 +1,274 @@ +--- +uuid: 36e607ef-db10-4202-be79-0e30560a6cb8 +task_id: suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-1 +model_id: gemma-3-27b-it +category: +- switch +- question +task: + input_text: Is the plug in the kitchen on? + expect_response: + - 'no' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the plug in the kitchen on? + context: + id: 01KHJQW2TXKQY7S3XTRZ4D9X0B + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:05.213845+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:05.216966+00:00 + - role: user + content: Is the plug in the kitchen on? + attachments: null + created: 2026-02-16 08:08:05.213915+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c0ef43b0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:05.216976+00:00 + duration_ms: 24.959 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-2.yaml new file mode 100644 index 000000000..04db2382f --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-2.yaml @@ -0,0 +1,274 @@ +--- +uuid: 3f4e5696-db55-468b-99cb-477b58a5a4f8 +task_id: suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-2 +model_id: gemma-3-27b-it +category: +- switch +- question +task: + input_text: Is the plug in the kitchen on? + expect_response: + - 'no' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the plug in the kitchen on? + context: + id: 01KHJQW32K1NKH1QKWN4959D8M + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:05.459730+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:05.462766+00:00 + - role: user + content: Is the plug in the kitchen on? + attachments: null + created: 2026-02-16 08:08:05.459803+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4fd9dd0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:05.462776+00:00 + duration_ms: 35.814 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-3.yaml new file mode 100644 index 000000000..e9bf2633a --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-3.yaml @@ -0,0 +1,274 @@ +--- +uuid: e87b7e45-a874-4ae6-ac5d-f9297e82c222 +task_id: suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-3 +model_id: gemma-3-27b-it +category: +- switch +- question +task: + input_text: Is the plug in the kitchen on? + expect_response: + - 'no' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the plug in the kitchen on? + context: + id: 01KHJQW3BTKWB6YNKQY8C5AACN + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:05.754467+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: light + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:05.756316+00:00 + - role: user + content: Is the plug in the kitchen on? + attachments: null + created: 2026-02-16 08:08:05.754537+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f46f6a30>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:05.756326+00:00 + duration_ms: 23.797 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-4.yaml new file mode 100644 index 000000000..223848508 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-4.yaml @@ -0,0 +1,274 @@ +--- +uuid: cf579a36-ee8a-42d7-bd73-36db376206c3 +task_id: suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-4 +model_id: gemma-3-27b-it +category: +- switch +- question +task: + input_text: Is the plug in the kitchen on? + expect_response: + - 'no' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the plug in the kitchen on? + context: + id: 01KHJQW42X0EFE0J7RXJCT06C6 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:06.493824+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: light + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:06.498362+00:00 + - role: user + content: Is the plug in the kitchen on? + attachments: null + created: 2026-02-16 08:08:06.493894+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1e0f8a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:06.498372+00:00 + duration_ms: 30.957 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-5.yaml new file mode 100644 index 000000000..af9716713 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-5.yaml @@ -0,0 +1,274 @@ +--- +uuid: 5f4b071b-8a3d-4d8b-98f5-cd6a0bc6e4de +task_id: suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-5 +model_id: gemma-3-27b-it +category: +- switch +- question +task: + input_text: Is the plug in the kitchen on? + expect_response: + - 'no' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the plug in the kitchen on? + context: + id: 01KHJQW4AHJHZYTZYQNG93AM6M + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:06.737327+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:06.739582+00:00 + - role: user + content: Is the plug in the kitchen on? + attachments: null + created: 2026-02-16 08:08:06.737398+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f484cf60>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:06.739592+00:00 + duration_ms: 26.156 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-6.yaml new file mode 100644 index 000000000..5847a2f6e --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-6.yaml @@ -0,0 +1,274 @@ +--- +uuid: 430c2a3e-426b-4aa3-a2d7-fbf479784352 +task_id: suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-6 +model_id: gemma-3-27b-it +category: +- switch +- question +task: + input_text: Is the plug in the kitchen on? + expect_response: + - 'no' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the plug in the kitchen on? + context: + id: 01KHJQW4JFZJDB2EQNXANYZ0ZY + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:06.991337+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:06.994644+00:00 + - role: user + content: Is the plug in the kitchen on? + attachments: null + created: 2026-02-16 08:08:06.991407+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f47f2610>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:06.994655+00:00 + duration_ms: 31.149 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-7.yaml new file mode 100644 index 000000000..624cb6956 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-7.yaml @@ -0,0 +1,274 @@ +--- +uuid: c1fb2645-812e-4497-a362-906c582a06cb +task_id: suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-7 +model_id: gemma-3-27b-it +category: +- switch +- question +task: + input_text: Is the plug in the kitchen on? + expect_response: + - 'no' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the plug in the kitchen on? + context: + id: 01KHJQW4TCE7ZGDEV30ERR5KG0 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:07.244150+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:07.249118+00:00 + - role: user + content: Is the plug in the kitchen on? + attachments: null + created: 2026-02-16 08:08:07.244223+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f5065010>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:07.249129+00:00 + duration_ms: 26.699 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-8.yaml new file mode 100644 index 000000000..9d18c8fa6 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-8.yaml @@ -0,0 +1,274 @@ +--- +uuid: 5683c07d-d4dd-4199-b223-01f72e695ef0 +task_id: suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-8 +model_id: gemma-3-27b-it +category: +- switch +- question +task: + input_text: Is the plug in the kitchen on? + expect_response: + - 'no' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the plug in the kitchen on? + context: + id: 01KHJQW527FZAY7VABJ1HBBAZT + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:07.495765+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:07.497733+00:00 + - role: user + content: Is the plug in the kitchen on? + attachments: null + created: 2026-02-16 08:08:07.495838+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47d43431c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:07.497742+00:00 + duration_ms: 27.764 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-9.yaml new file mode 100644 index 000000000..ef987d29a --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-9.yaml @@ -0,0 +1,274 @@ +--- +uuid: 613fa9e6-3905-4c20-a244-32eebf6c44cd +task_id: suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-9 +model_id: gemma-3-27b-it +category: +- switch +- question +task: + input_text: Is the plug in the kitchen on? + expect_response: + - 'no' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the plug in the kitchen on? + context: + id: 01KHJQW5A0R5T65KE71ATBRNNX + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:07.744135+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:07.746006+00:00 + - role: user + content: Is the plug in the kitchen on? + attachments: null + created: 2026-02-16 08:08:07.744203+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4e6ada0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:07.746016+00:00 + duration_ms: 26.951 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-0.yaml new file mode 100644 index 000000000..4cef504ba --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-0.yaml @@ -0,0 +1,275 @@ +--- +uuid: 49f59263-5042-40ce-b667-25815533b09a +task_id: suburban_familiy_home_be_valve-are_the_sprinklers_on-0 +model_id: gemma-3-27b-it +category: +- valve +- question +task: + input_text: Are the sprinklers on? + expect_response: + - 'yes' + - 'on' + - open +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Are the sprinklers on? + context: + id: 01KHJQW5J1AQ1J1MYSJ1XQAVP8 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:08.001929+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:08.004187+00:00 + - role: user + content: Are the sprinklers on? + attachments: null + created: 2026-02-16 08:08:08.002001+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4f7b3d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:08.004197+00:00 + duration_ms: 31.607 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-1.yaml new file mode 100644 index 000000000..33f5c4fd9 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-1.yaml @@ -0,0 +1,275 @@ +--- +uuid: bd8779f1-b7b4-4400-94cc-2e9c7956fc19 +task_id: suburban_familiy_home_be_valve-are_the_sprinklers_on-1 +model_id: gemma-3-27b-it +category: +- valve +- question +task: + input_text: Are the sprinklers on? + expect_response: + - 'yes' + - 'on' + - open +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Are the sprinklers on? + context: + id: 01KHJQW5T3CJJ4PXWYF3MQ52B4 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:08.259972+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:08.266966+00:00 + - role: user + content: Are the sprinklers on? + attachments: null + created: 2026-02-16 08:08:08.260044+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f46d31c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:08.266977+00:00 + duration_ms: 29.732 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-2.yaml new file mode 100644 index 000000000..fa53aa353 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-2.yaml @@ -0,0 +1,275 @@ +--- +uuid: 6c43403f-f4f8-4593-98ff-a009aea64c53 +task_id: suburban_familiy_home_be_valve-are_the_sprinklers_on-2 +model_id: gemma-3-27b-it +category: +- valve +- question +task: + input_text: Are the sprinklers on? + expect_response: + - 'yes' + - 'on' + - open +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Are the sprinklers on? + context: + id: 01KHJQW61ZGWP1MCFNK6C5MKDW + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:08.511128+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:08.513404+00:00 + - role: user + content: Are the sprinklers on? + attachments: null + created: 2026-02-16 08:08:08.511201+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4d34250>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:08.513414+00:00 + duration_ms: 31.526 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-3.yaml new file mode 100644 index 000000000..3db9de209 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-3.yaml @@ -0,0 +1,275 @@ +--- +uuid: 98ddbc35-19da-47b0-9340-cdeb18c809a5 +task_id: suburban_familiy_home_be_valve-are_the_sprinklers_on-3 +model_id: gemma-3-27b-it +category: +- valve +- question +task: + input_text: Are the sprinklers on? + expect_response: + - 'yes' + - 'on' + - open +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Are the sprinklers on? + context: + id: 01KHJQW69SX37P9N486M3NZQAY + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:08.761846+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:08.764192+00:00 + - role: user + content: Are the sprinklers on? + attachments: null + created: 2026-02-16 08:08:08.761918+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4fa81a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:08.764203+00:00 + duration_ms: 24.388 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-4.yaml new file mode 100644 index 000000000..665a591da --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-4.yaml @@ -0,0 +1,275 @@ +--- +uuid: 8abf6ddd-3a5a-48c4-bf23-15f6ca9f3e7b +task_id: suburban_familiy_home_be_valve-are_the_sprinklers_on-4 +model_id: gemma-3-27b-it +category: +- valve +- question +task: + input_text: Are the sprinklers on? + expect_response: + - 'yes' + - 'on' + - open +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Are the sprinklers on? + context: + id: 01KHJQW6J18JNKS2RGPQ0YVK1K + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:09.025438+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:09.027318+00:00 + - role: user + content: Are the sprinklers on? + attachments: null + created: 2026-02-16 08:08:09.025510+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f50cbc10>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:09.027328+00:00 + duration_ms: 24.861 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-5.yaml new file mode 100644 index 000000000..969546190 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-5.yaml @@ -0,0 +1,275 @@ +--- +uuid: e6a5f6d1-b78c-441b-ae11-01424cee692a +task_id: suburban_familiy_home_be_valve-are_the_sprinklers_on-5 +model_id: gemma-3-27b-it +category: +- valve +- question +task: + input_text: Are the sprinklers on? + expect_response: + - 'yes' + - 'on' + - open +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Are the sprinklers on? + context: + id: 01KHJQW6T7GTFGVFY0RP7HPM2W + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:09.287149+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:09.289873+00:00 + - role: user + content: Are the sprinklers on? + attachments: null + created: 2026-02-16 08:08:09.287263+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47d4112cf0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:09.289885+00:00 + duration_ms: 35.171 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-6.yaml new file mode 100644 index 000000000..afe5d3136 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-6.yaml @@ -0,0 +1,275 @@ +--- +uuid: feb7bba9-372d-4963-8ed7-e917649187f0 +task_id: suburban_familiy_home_be_valve-are_the_sprinklers_on-6 +model_id: gemma-3-27b-it +category: +- valve +- question +task: + input_text: Are the sprinklers on? + expect_response: + - 'yes' + - 'on' + - open +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Are the sprinklers on? + context: + id: 01KHJQW7NEGZC0FDYRJ81KNH3T + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:10.158680+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: light + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:10.160451+00:00 + - role: user + content: Are the sprinklers on? + attachments: null + created: 2026-02-16 08:08:10.158750+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f6163c10>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:10.160460+00:00 + duration_ms: 34.603 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-7.yaml new file mode 100644 index 000000000..33c50415c --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-7.yaml @@ -0,0 +1,275 @@ +--- +uuid: f35d440a-6b02-4bbb-b538-f1cc70dd531b +task_id: suburban_familiy_home_be_valve-are_the_sprinklers_on-7 +model_id: gemma-3-27b-it +category: +- valve +- question +task: + input_text: Are the sprinklers on? + expect_response: + - 'yes' + - 'on' + - open +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Are the sprinklers on? + context: + id: 01KHJQW7X8TGNP743FVET9Y4NC + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:10.408609+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:10.410383+00:00 + - role: user + content: Are the sprinklers on? + attachments: null + created: 2026-02-16 08:08:10.408678+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4689c70>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:10.410393+00:00 + duration_ms: 34.601 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-8.yaml new file mode 100644 index 000000000..e1e791225 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-8.yaml @@ -0,0 +1,275 @@ +--- +uuid: 4d34dada-f009-41e6-81b8-e4db09922957 +task_id: suburban_familiy_home_be_valve-are_the_sprinklers_on-8 +model_id: gemma-3-27b-it +category: +- valve +- question +task: + input_text: Are the sprinklers on? + expect_response: + - 'yes' + - 'on' + - open +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Are the sprinklers on? + context: + id: 01KHJQW85J607YFVV3S40ZRT63 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:10.674152+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:10.675898+00:00 + - role: user + content: Are the sprinklers on? + attachments: null + created: 2026-02-16 08:08:10.674221+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4c32fb0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:10.675908+00:00 + duration_ms: 26.164 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-9.yaml new file mode 100644 index 000000000..dde966951 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-are_the_sprinklers_on-9.yaml @@ -0,0 +1,275 @@ +--- +uuid: 90ca99f8-6189-4187-9d8b-ecc179659731 +task_id: suburban_familiy_home_be_valve-are_the_sprinklers_on-9 +model_id: gemma-3-27b-it +category: +- valve +- question +task: + input_text: Are the sprinklers on? + expect_response: + - 'yes' + - 'on' + - open +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Are the sprinklers on? + context: + id: 01KHJQW8DWY8X6X94W9YB8RBHE + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:10.940978+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: light + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:10.967952+00:00 + - role: user + content: Are the sprinklers on? + attachments: null + created: 2026-02-16 08:08:10.941050+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f45ac0f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:10.967967+00:00 + duration_ms: 55.276 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-0.yaml new file mode 100644 index 000000000..0e762bdc5 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-0.yaml @@ -0,0 +1,275 @@ +--- +uuid: d2d651db-3e0a-488c-9045-978de5f400a6 +task_id: suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-0 +model_id: gemma-3-27b-it +category: +- valve +- question +task: + input_text: Please tell me, are the sprinklers on? + expect_response: + - 'no' + - 'off' + - closed +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Please tell me, are the sprinklers on? + context: + id: 01KHJQW8Q2JHTMAJD9EWMGGNFN + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:11.234169+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:11.236098+00:00 + - role: user + content: Please tell me, are the sprinklers on? + attachments: null + created: 2026-02-16 08:08:11.234264+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1c317a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:11.236108+00:00 + duration_ms: 25.436 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-1.yaml new file mode 100644 index 000000000..c82bdf310 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-1.yaml @@ -0,0 +1,275 @@ +--- +uuid: b4c6004d-be31-49aa-aff0-bc06fbdc7e2e +task_id: suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-1 +model_id: gemma-3-27b-it +category: +- valve +- question +task: + input_text: Please tell me, are the sprinklers on? + expect_response: + - 'no' + - 'off' + - closed +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Please tell me, are the sprinklers on? + context: + id: 01KHJQW94XAJSC5Y2SCZHX435R + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:11.677818+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:11.679634+00:00 + - role: user + content: Please tell me, are the sprinklers on? + attachments: null + created: 2026-02-16 08:08:11.677890+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4d54460>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:11.679644+00:00 + duration_ms: 24.96 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-2.yaml new file mode 100644 index 000000000..fac7c9065 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-2.yaml @@ -0,0 +1,275 @@ +--- +uuid: bab6a4f3-33d0-48fb-a9d4-831916aa43a0 +task_id: suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-2 +model_id: gemma-3-27b-it +category: +- valve +- question +task: + input_text: Please tell me, are the sprinklers on? + expect_response: + - 'no' + - 'off' + - closed +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Please tell me, are the sprinklers on? + context: + id: 01KHJQW9D4FG73PSHPWP3BAACK + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:11.940439+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: light + areas: Garage + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:11.942360+00:00 + - role: user + content: Please tell me, are the sprinklers on? + attachments: null + created: 2026-02-16 08:08:11.940519+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f48e8720>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:11.942370+00:00 + duration_ms: 27.757 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-3.yaml new file mode 100644 index 000000000..e0aea571b --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-3.yaml @@ -0,0 +1,275 @@ +--- +uuid: 73d37a90-3c2f-4f89-bef0-fed17f02949c +task_id: suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-3 +model_id: gemma-3-27b-it +category: +- valve +- question +task: + input_text: Please tell me, are the sprinklers on? + expect_response: + - 'no' + - 'off' + - closed +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Please tell me, are the sprinklers on? + context: + id: 01KHJQW9N7YKDCKXGY68PH0SHY + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:12.199696+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:12.201475+00:00 + - role: user + content: Please tell me, are the sprinklers on? + attachments: null + created: 2026-02-16 08:08:12.199764+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f50bba00>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:12.201484+00:00 + duration_ms: 24.013 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-4.yaml new file mode 100644 index 000000000..937abf414 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-4.yaml @@ -0,0 +1,275 @@ +--- +uuid: eeb87fb5-aadd-471e-b6cb-0cbf23ed44dc +task_id: suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-4 +model_id: gemma-3-27b-it +category: +- valve +- question +task: + input_text: Please tell me, are the sprinklers on? + expect_response: + - 'no' + - 'off' + - closed +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Please tell me, are the sprinklers on? + context: + id: 01KHJQW9WQGPB2RJJA8BCGXB5X + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:12.439182+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:12.442432+00:00 + - role: user + content: Please tell me, are the sprinklers on? + attachments: null + created: 2026-02-16 08:08:12.439278+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4619e80>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:12.442443+00:00 + duration_ms: 34.195 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-5.yaml new file mode 100644 index 000000000..f4c511bd1 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-5.yaml @@ -0,0 +1,275 @@ +--- +uuid: 3bf13395-f0f2-4d4f-bbc5-c09e980bc930 +task_id: suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-5 +model_id: gemma-3-27b-it +category: +- valve +- question +task: + input_text: Please tell me, are the sprinklers on? + expect_response: + - 'no' + - 'off' + - closed +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Please tell me, are the sprinklers on? + context: + id: 01KHJQWA4WWZN85DS9PM5BH47X + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:12.700508+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:12.702404+00:00 + - role: user + content: Please tell me, are the sprinklers on? + attachments: null + created: 2026-02-16 08:08:12.700579+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c0f94510>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:12.702414+00:00 + duration_ms: 32.371 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-6.yaml new file mode 100644 index 000000000..08e1d03c6 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-6.yaml @@ -0,0 +1,275 @@ +--- +uuid: 1e8d142c-51f7-46a1-b163-1d9a384af371 +task_id: suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-6 +model_id: gemma-3-27b-it +category: +- valve +- question +task: + input_text: Please tell me, are the sprinklers on? + expect_response: + - 'no' + - 'off' + - closed +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Please tell me, are the sprinklers on? + context: + id: 01KHJQWACCHYZC77PQ61PC1C72 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:12.940407+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:12.942275+00:00 + - role: user + content: Please tell me, are the sprinklers on? + attachments: null + created: 2026-02-16 08:08:12.940477+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47d4111590>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:12.942284+00:00 + duration_ms: 25.837 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-7.yaml new file mode 100644 index 000000000..bd13b2fba --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-7.yaml @@ -0,0 +1,275 @@ +--- +uuid: 42908ab2-3374-4747-85b2-909c67fee65a +task_id: suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-7 +model_id: gemma-3-27b-it +category: +- valve +- question +task: + input_text: Please tell me, are the sprinklers on? + expect_response: + - 'no' + - 'off' + - closed +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Please tell me, are the sprinklers on? + context: + id: 01KHJQWAN0H0GT4RRFFMSKM33J + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:13.216878+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:13.221187+00:00 + - role: user + content: Please tell me, are the sprinklers on? + attachments: null + created: 2026-02-16 08:08:13.216948+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1f17690>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:13.221197+00:00 + duration_ms: 27.891 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-8.yaml new file mode 100644 index 000000000..00563c557 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-8.yaml @@ -0,0 +1,275 @@ +--- +uuid: a9456bdb-2788-435e-8509-861c547904e6 +task_id: suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-8 +model_id: gemma-3-27b-it +category: +- valve +- question +task: + input_text: Please tell me, are the sprinklers on? + expect_response: + - 'no' + - 'off' + - closed +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Please tell me, are the sprinklers on? + context: + id: 01KHJQWAWNP5QWEW0NCJMS773V + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:13.461782+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:13.463596+00:00 + - role: user + content: Please tell me, are the sprinklers on? + attachments: null + created: 2026-02-16 08:08:13.461855+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4d57480>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:13.463605+00:00 + duration_ms: 23.597 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-9.yaml new file mode 100644 index 000000000..d43a06ed7 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-9.yaml @@ -0,0 +1,275 @@ +--- +uuid: 77716ca5-e7e3-47fb-ae08-b3a0db0e96e6 +task_id: suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-9 +model_id: gemma-3-27b-it +category: +- valve +- question +task: + input_text: Please tell me, are the sprinklers on? + expect_response: + - 'no' + - 'off' + - closed +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Please tell me, are the sprinklers on? + context: + id: 01KHJQWB4CPXSMDDP0M3SR82Z7 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:13.708418+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Backyard Light + domain: light + areas: Backyard + - names: Dining Room Light + domain: light + areas: Dining Room + - names: Family Room Light + domain: light + areas: Family Room + - names: Front Yard Light + domain: light + areas: Front Yard + - names: Garage Door + domain: cover + areas: Garage + - names: Garage Door + domain: light + areas: Garage + - names: Garage Light + domain: light + areas: Garage + - names: Guest Room Light + domain: light + areas: Guest Room + - names: Guest Room Thermostat + domain: climate + areas: Guest Room + - names: Guest Room Thermostat Humidity + domain: sensor + areas: Guest Room + - names: Guest Room Thermostat Temperature + domain: sensor + areas: Guest Room + - names: Kids Room 1 Light + domain: light + areas: Kids Room 1 + - names: Kids Room 2 Light + domain: light + areas: Kids Room 2 + - names: Kitchen Appliance Plug + domain: switch + areas: Kitchen + - names: Kitchen Appliance Plug Energy + domain: sensor + areas: Kitchen + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Kitchen Sink + domain: valve + areas: Kitchen + - names: Kitchen Sink Battery + domain: sensor + areas: Kitchen + - names: Kitchen Sink Meter-reading + domain: sensor + areas: Kitchen + - names: Living Room Light + domain: light + areas: Living Room + - names: Master Bedroom Light + domain: light + areas: Master Bedroom + - names: Master Bedroom Thermostat + domain: climate + areas: Master Bedroom + - names: Master Bedroom Thermostat Humidity + domain: sensor + areas: Master Bedroom + - names: Master Bedroom Thermostat Temperature + domain: sensor + areas: Master Bedroom + - names: Refrigerator Generic + domain: sensor + areas: Kitchen + - names: Smart Speaker + domain: media_player + areas: Living Room + - names: Sprinklers + domain: valve + areas: Backyard + - names: Tv + domain: media_player + areas: Living Room + created: 2026-02-16 08:08:13.710302+00:00 + - role: user + content: Please tell me, are the sprinklers on? + attachments: null + created: 2026-02-16 08:08:13.708488+00:00 + tools: + - name: HassClimateSetTemperature + description: Sets the target temperature of a climate device or entity + parameters: '{''temperature'': Coerce(float, msg=None), ''area'': , ''name'': , ''floor'': }' + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c0f0f7f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassSetPosition + description: Sets the position of a device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None), ''position'': All(Coerce(int, msg=None), Range(min=0, + max=100, min_included=True, max_included=True, msg=None), msg=None)}' + - name: HassStopMoving + description: Stops a moving device or entity + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:13.710312+00:00 + duration_ms: 172.603 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-0.yaml new file mode 100644 index 000000000..c8e560324 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-0.yaml @@ -0,0 +1,224 @@ +--- +uuid: f69ab5b3-c5c9-4a03-a793-67b5907ae701 +task_id: urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-0 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my calendar, is anyone visiting? + expect_response: + - liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my calendar, is anyone visiting? + context: + id: 01KHJQXADQCTTBSDK5X577RZ64 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:45.751937+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:45.753493+00:00 + - role: user + content: According to my calendar, is anyone visiting? + attachments: null + created: 2026-02-16 08:08:45.752007+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1b4c300>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:45.753502+00:00 + duration_ms: 23.188 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-1.yaml new file mode 100644 index 000000000..37fee9fe3 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-1.yaml @@ -0,0 +1,224 @@ +--- +uuid: a7af2631-8174-48a9-a555-175d04cea204 +task_id: urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-1 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my calendar, is anyone visiting? + expect_response: + - liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my calendar, is anyone visiting? + context: + id: 01KHJQXAKNH2RG23JJP0B1G637 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:45.941361+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:45.942880+00:00 + - role: user + content: According to my calendar, is anyone visiting? + attachments: null + created: 2026-02-16 08:08:45.941433+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2a197a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:45.942890+00:00 + duration_ms: 23.737 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-2.yaml new file mode 100644 index 000000000..2732af135 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-2.yaml @@ -0,0 +1,224 @@ +--- +uuid: 475309ca-e6f9-4a18-8e76-d0e8fca7aa33 +task_id: urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-2 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my calendar, is anyone visiting? + expect_response: + - liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my calendar, is anyone visiting? + context: + id: 01KHJQXASX0QQVAYKRWJHBM1AD + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:46.141788+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:46.143344+00:00 + - role: user + content: According to my calendar, is anyone visiting? + attachments: null + created: 2026-02-16 08:08:46.141859+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c3eaacf0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:46.143354+00:00 + duration_ms: 22.746 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-3.yaml new file mode 100644 index 000000000..9893d87a0 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-3.yaml @@ -0,0 +1,224 @@ +--- +uuid: 60fa7ce9-ce93-46af-8a52-5aaf0eb79a9b +task_id: urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-3 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my calendar, is anyone visiting? + expect_response: + - liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my calendar, is anyone visiting? + context: + id: 01KHJQXB0WW4X87YBTWHS3PY5B + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:46.364713+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:46.366269+00:00 + - role: user + content: According to my calendar, is anyone visiting? + attachments: null + created: 2026-02-16 08:08:46.364782+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4883060>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:46.366279+00:00 + duration_ms: 27.322 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-4.yaml new file mode 100644 index 000000000..1e485e7e6 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-4.yaml @@ -0,0 +1,224 @@ +--- +uuid: 0b4a912a-33e8-40c1-a17e-199a097eb550 +task_id: urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-4 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my calendar, is anyone visiting? + expect_response: + - liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my calendar, is anyone visiting? + context: + id: 01KHJQXB6X4NHQNYE781NAHR65 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:46.557217+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:46.564081+00:00 + - role: user + content: According to my calendar, is anyone visiting? + attachments: null + created: 2026-02-16 08:08:46.557310+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1b4ce00>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:46.564094+00:00 + duration_ms: 33.642 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-5.yaml new file mode 100644 index 000000000..2c1944b5f --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-5.yaml @@ -0,0 +1,224 @@ +--- +uuid: cbca2be5-2f6a-47a0-bd18-eca0209ad4c4 +task_id: urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-5 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my calendar, is anyone visiting? + expect_response: + - liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my calendar, is anyone visiting? + context: + id: 01KHJQXBD9RNEDE5J7MKCX5X4H + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:46.761407+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:46.767920+00:00 + - role: user + content: According to my calendar, is anyone visiting? + attachments: null + created: 2026-02-16 08:08:46.761476+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c0f09b10>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:46.767931+00:00 + duration_ms: 41.085 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-6.yaml new file mode 100644 index 000000000..25c69c3af --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-6.yaml @@ -0,0 +1,224 @@ +--- +uuid: 19301e2a-7c28-4844-b170-5ebdacd3ffe3 +task_id: urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-6 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my calendar, is anyone visiting? + expect_response: + - liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my calendar, is anyone visiting? + context: + id: 01KHJQXBM3GQMYABXPS4WWX4CT + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:46.979569+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:46.981085+00:00 + - role: user + content: According to my calendar, is anyone visiting? + attachments: null + created: 2026-02-16 08:08:46.979638+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f611e820>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:46.981095+00:00 + duration_ms: 22.157 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-7.yaml new file mode 100644 index 000000000..6e15f85af --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-7.yaml @@ -0,0 +1,224 @@ +--- +uuid: 46179d8f-abcc-4c27-aa28-4de4d7ba53e8 +task_id: urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-7 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my calendar, is anyone visiting? + expect_response: + - liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my calendar, is anyone visiting? + context: + id: 01KHJQXBT9YH1W6AFM056HSHF0 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:47.177406+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:47.184681+00:00 + - role: user + content: According to my calendar, is anyone visiting? + attachments: null + created: 2026-02-16 08:08:47.177475+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2865640>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:47.184692+00:00 + duration_ms: 30.436 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-8.yaml new file mode 100644 index 000000000..2bd616955 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-8.yaml @@ -0,0 +1,224 @@ +--- +uuid: 49de3264-bb51-47ef-8f4e-6246b9a322c0 +task_id: urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-8 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my calendar, is anyone visiting? + expect_response: + - liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my calendar, is anyone visiting? + context: + id: 01KHJQXC0QZXY54JTFN953PAYH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:47.383888+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:47.385456+00:00 + - role: user + content: According to my calendar, is anyone visiting? + attachments: null + created: 2026-02-16 08:08:47.383959+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4e78510>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:47.385467+00:00 + duration_ms: 27.331 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-9.yaml new file mode 100644 index 000000000..34bb4f14c --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-9.yaml @@ -0,0 +1,224 @@ +--- +uuid: b70a9070-63e6-4f58-b391-9c930a64af57 +task_id: urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-9 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my calendar, is anyone visiting? + expect_response: + - liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my calendar, is anyone visiting? + context: + id: 01KHJQXC7C2EWPD6RY4GHBRA1S + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:47.596479+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:47.599557+00:00 + - role: user + content: According to my calendar, is anyone visiting? + attachments: null + created: 2026-02-16 08:08:47.596550+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1825bc0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:47.599568+00:00 + duration_ms: 29.687 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-0.yaml new file mode 100644 index 000000000..9d8f7c220 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-0.yaml @@ -0,0 +1,235 @@ +--- +uuid: 80e69a37-4aa2-4c61-9660-8def946b5a5d +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-0 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, are we leaving the house today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, are we leaving the house today? + context: + id: 01KHJQWYENX2N0H31V6TA8GMHY + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:33.493969+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:33.497002+00:00 + - role: user + content: According to my personal calendar, are we leaving the house today? + attachments: null + created: 2026-02-16 08:08:33.494040+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2fb05c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:33.497011+00:00 + duration_ms: 25.214 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-1.yaml new file mode 100644 index 000000000..594470f8e --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-1.yaml @@ -0,0 +1,235 @@ +--- +uuid: 91c43aa1-f687-4ed4-a8d9-fcd6ebc0e689 +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-1 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, are we leaving the house today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, are we leaving the house today? + context: + id: 01KHJQWYMZ58S6DH5CMBND25V3 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:33.695574+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:33.697752+00:00 + - role: user + content: According to my personal calendar, are we leaving the house today? + attachments: null + created: 2026-02-16 08:08:33.695645+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f48bd220>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:33.697761+00:00 + duration_ms: 25.214 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-2.yaml new file mode 100644 index 000000000..da49afad5 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-2.yaml @@ -0,0 +1,235 @@ +--- +uuid: 88ac4993-4d08-43a3-90f4-20bf17d32e3b +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-2 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, are we leaving the house today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, are we leaving the house today? + context: + id: 01KHJQWYV2GDQZCZ9JFQNRG9XS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:33.890211+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:33.893213+00:00 + - role: user + content: According to my personal calendar, are we leaving the house today? + attachments: null + created: 2026-02-16 08:08:33.890308+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47d41e2350>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:33.893223+00:00 + duration_ms: 24.661 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-3.yaml new file mode 100644 index 000000000..50e6bed87 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-3.yaml @@ -0,0 +1,235 @@ +--- +uuid: fc5b0aaf-ff34-463b-ba5f-e46d32725a84 +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-3 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, are we leaving the house today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, are we leaving the house today? + context: + id: 01KHJQWZ14NXA3DSEWP78HTPMT + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:34.085009+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:34.086583+00:00 + - role: user + content: According to my personal calendar, are we leaving the house today? + attachments: null + created: 2026-02-16 08:08:34.085080+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47d43768d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:34.086592+00:00 + duration_ms: 173.949 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-4.yaml new file mode 100644 index 000000000..75f68d908 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-4.yaml @@ -0,0 +1,235 @@ +--- +uuid: afc7a0c4-bc36-47c2-a1d2-908ac5d71a54 +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-4 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, are we leaving the house today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, are we leaving the house today? + context: + id: 01KHJQWZDJ9A67QTXC3NH3HP8W + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:34.482582+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:34.484194+00:00 + - role: user + content: According to my personal calendar, are we leaving the house today? + attachments: null + created: 2026-02-16 08:08:34.482658+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1819c70>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:34.484202+00:00 + duration_ms: 23.348 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-5.yaml new file mode 100644 index 000000000..dcebaedc3 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-5.yaml @@ -0,0 +1,235 @@ +--- +uuid: f24323e9-8463-4bc1-88c0-965d1ef2f335 +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-5 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, are we leaving the house today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, are we leaving the house today? + context: + id: 01KHJQWZKMZE8X16S76BXTCQ1K + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:34.676776+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:34.678370+00:00 + - role: user + content: According to my personal calendar, are we leaving the house today? + attachments: null + created: 2026-02-16 08:08:34.676850+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4c4dfe0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:34.678379+00:00 + duration_ms: 53.901 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-6.yaml new file mode 100644 index 000000000..f9d3e8fc8 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-6.yaml @@ -0,0 +1,235 @@ +--- +uuid: 600bb546-6ec4-4af7-94fa-fde825fe34d8 +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-6 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, are we leaving the house today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, are we leaving the house today? + context: + id: 01KHJQWZT9QRKTPET9FMAMY5FG + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:34.889276+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:34.890843+00:00 + - role: user + content: According to my personal calendar, are we leaving the house today? + attachments: null + created: 2026-02-16 08:08:34.889350+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1644e00>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:34.890852+00:00 + duration_ms: 174.996 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-7.yaml new file mode 100644 index 000000000..1172fcf5e --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-7.yaml @@ -0,0 +1,235 @@ +--- +uuid: 4160622e-aaed-4ac6-96be-e4ca145e9c52 +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-7 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, are we leaving the house today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, are we leaving the house today? + context: + id: 01KHJQX053H6ADMAKDSB1F738N + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:35.235111+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:35.236612+00:00 + - role: user + content: According to my personal calendar, are we leaving the house today? + attachments: null + created: 2026-02-16 08:08:35.235182+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c27c50c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:35.236622+00:00 + duration_ms: 29.339 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-8.yaml new file mode 100644 index 000000000..817ffe1e3 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-8.yaml @@ -0,0 +1,235 @@ +--- +uuid: 3164ddb4-f3df-4184-8679-17e035cbb419 +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-8 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, are we leaving the house today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, are we leaving the house today? + context: + id: 01KHJQX0BFTAE46QZ8FVCXGPVY + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:35.439593+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:35.444895+00:00 + - role: user + content: According to my personal calendar, are we leaving the house today? + attachments: null + created: 2026-02-16 08:08:35.439664+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c0eee090>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:35.444906+00:00 + duration_ms: 25.635 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-9.yaml new file mode 100644 index 000000000..3195e0f76 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-9.yaml @@ -0,0 +1,235 @@ +--- +uuid: e239e89b-f443-40b7-8ab9-c461dd0703af +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-9 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, are we leaving the house today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, are we leaving the house today? + context: + id: 01KHJQX0HZ0811FN184EPCF1HB + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:35.647231+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:35.649144+00:00 + - role: user + content: According to my personal calendar, are we leaving the house today? + attachments: null + created: 2026-02-16 08:08:35.647343+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4f83110>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:35.649154+00:00 + duration_ms: 29.419 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-0.yaml new file mode 100644 index 000000000..36f1416b7 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-0.yaml @@ -0,0 +1,238 @@ +--- +uuid: 3bbd35a3-47b5-47a7-b168-83d6b16ba033 +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-0 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, do I have any events away from home + today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, do I have any events away from home + today? + context: + id: 01KHJQX0RYT77YX6JGVQCK5MET + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:35.870927+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:35.872846+00:00 + - role: user + content: According to my personal calendar, do I have any events away from + home today? + attachments: null + created: 2026-02-16 08:08:35.870997+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1adb5e0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:35.872857+00:00 + duration_ms: 29.012 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-1.yaml new file mode 100644 index 000000000..10dfb1ff0 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-1.yaml @@ -0,0 +1,238 @@ +--- +uuid: 70ad1707-23f5-4605-bfc7-5c862826b4e2 +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-1 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, do I have any events away from home + today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, do I have any events away from home + today? + context: + id: 01KHJQX114BMVXWVZ45X2V51VF + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:36.132925+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:36.134413+00:00 + - role: user + content: According to my personal calendar, do I have any events away from + home today? + attachments: null + created: 2026-02-16 08:08:36.132997+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2ac1d20>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:36.134423+00:00 + duration_ms: 24.286 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-2.yaml new file mode 100644 index 000000000..bb6905574 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-2.yaml @@ -0,0 +1,238 @@ +--- +uuid: f11d41ec-8679-4e0c-9d13-9ff0b7642f36 +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-2 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, do I have any events away from home + today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, do I have any events away from home + today? + context: + id: 01KHJQX17X15NWPAAD4XAA7NQS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:36.349132+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:36.351031+00:00 + - role: user + content: According to my personal calendar, do I have any events away from + home today? + attachments: null + created: 2026-02-16 08:08:36.349204+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2da5640>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:36.351041+00:00 + duration_ms: 39.173 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-3.yaml new file mode 100644 index 000000000..6ee75b719 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-3.yaml @@ -0,0 +1,238 @@ +--- +uuid: 45ed2c74-3d12-4b3b-b49f-be50bace913f +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-3 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, do I have any events away from home + today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, do I have any events away from home + today? + context: + id: 01KHJQX1ESA1FGDMCHQDS3G4QR + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:36.569112+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:36.571936+00:00 + - role: user + content: According to my personal calendar, do I have any events away from + home today? + attachments: null + created: 2026-02-16 08:08:36.569181+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1be7cc0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:36.571946+00:00 + duration_ms: 32.119 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-4.yaml new file mode 100644 index 000000000..410b4cd06 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-4.yaml @@ -0,0 +1,238 @@ +--- +uuid: ce292068-ca49-487d-910c-bff4a0478397 +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-4 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, do I have any events away from home + today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, do I have any events away from home + today? + context: + id: 01KHJQX22EZ32D5Y6K6TYTPSPH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:37.199051+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:37.202006+00:00 + - role: user + content: According to my personal calendar, do I have any events away from + home today? + attachments: null + created: 2026-02-16 08:08:37.199125+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f50ce140>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:37.202016+00:00 + duration_ms: 169.378 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-5.yaml new file mode 100644 index 000000000..a29a1b8af --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-5.yaml @@ -0,0 +1,238 @@ +--- +uuid: 2cd7b4e7-e1f5-45b1-b4f6-638003c1d413 +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-5 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, do I have any events away from home + today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, do I have any events away from home + today? + context: + id: 01KHJQX2ETA2WVVGY61HGJA2P8 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:37.594835+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:37.597856+00:00 + - role: user + content: According to my personal calendar, do I have any events away from + home today? + attachments: null + created: 2026-02-16 08:08:37.594907+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1cc21f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:37.597867+00:00 + duration_ms: 30.245 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-6.yaml new file mode 100644 index 000000000..e2e23ef3b --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-6.yaml @@ -0,0 +1,238 @@ +--- +uuid: fae2c7cf-d08e-4836-8912-17bb0a6d7b5b +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-6 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, do I have any events away from home + today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, do I have any events away from home + today? + context: + id: 01KHJQX2P6SFKP6P5RK0V62B4H + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:37.830435+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:37.833540+00:00 + - role: user + content: According to my personal calendar, do I have any events away from + home today? + attachments: null + created: 2026-02-16 08:08:37.830507+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1bf2e50>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:37.833551+00:00 + duration_ms: 25.523 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-7.yaml new file mode 100644 index 000000000..cca2ba44d --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-7.yaml @@ -0,0 +1,238 @@ +--- +uuid: 5416bfc8-834e-47fe-bb09-075cc72e8d74 +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-7 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, do I have any events away from home + today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, do I have any events away from home + today? + context: + id: 01KHJQX2X8NZ21E55TQ063VBEB + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:38.056512+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:38.058077+00:00 + - role: user + content: According to my personal calendar, do I have any events away from + home today? + attachments: null + created: 2026-02-16 08:08:38.056583+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2a54b40>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:38.058087+00:00 + duration_ms: 24.66 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-8.yaml new file mode 100644 index 000000000..63e42b7d1 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-8.yaml @@ -0,0 +1,238 @@ +--- +uuid: ac5939ce-0a61-49f6-9782-c228c08a42a9 +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-8 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, do I have any events away from home + today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, do I have any events away from home + today? + context: + id: 01KHJQX33JK9TSWNV2VKYZKA77 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:38.258275+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:38.261407+00:00 + - role: user + content: According to my personal calendar, do I have any events away from + home today? + attachments: null + created: 2026-02-16 08:08:38.258350+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f48ca400>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:38.261418+00:00 + duration_ms: 25.901 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-9.yaml new file mode 100644 index 000000000..af74b987b --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-9.yaml @@ -0,0 +1,238 @@ +--- +uuid: 2b4544fe-68a1-4820-8dbf-2d6743ffb78b +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-9 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, do I have any events away from home + today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, do I have any events away from home + today? + context: + id: 01KHJQX39VMK5EBYQ3SEH3RH4S + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:38.459889+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:38.461755+00:00 + - role: user + content: According to my personal calendar, do I have any events away from + home today? + attachments: null + created: 2026-02-16 08:08:38.459959+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c27cf3d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:38.461765+00:00 + duration_ms: 25.914 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-0.yaml new file mode 100644 index 000000000..81027d98c --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-0.yaml @@ -0,0 +1,224 @@ +--- +uuid: 620a2cb2-55ec-4f2a-9f95-50af5f0fbdc6 +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-0 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, who am I meeting for dinner? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, who am I meeting for dinner? + context: + id: 01KHJQWK3ZKCPTRYYHP3ZRBEG5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:21.887374+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:21.889201+00:00 + - role: user + content: According to my personal calendar, who am I meeting for dinner? + attachments: null + created: 2026-02-16 08:08:21.887444+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c0e6d2d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:21.889211+00:00 + duration_ms: 39.954 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-1.yaml new file mode 100644 index 000000000..9800cb782 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-1.yaml @@ -0,0 +1,224 @@ +--- +uuid: 376820dc-e10b-4f2d-bdc0-017499578999 +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-1 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, who am I meeting for dinner? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, who am I meeting for dinner? + context: + id: 01KHJQWKATWGE0SXZX48R8QAKD + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:22.106526+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:22.107997+00:00 + - role: user + content: According to my personal calendar, who am I meeting for dinner? + attachments: null + created: 2026-02-16 08:08:22.106596+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c19dc930>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:22.108007+00:00 + duration_ms: 168.748 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-2.yaml new file mode 100644 index 000000000..bfa70696b --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-2.yaml @@ -0,0 +1,224 @@ +--- +uuid: f24816ae-bd19-49cb-81b6-04f5b3d00087 +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-2 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, who am I meeting for dinner? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, who am I meeting for dinner? + context: + id: 01KHJQWKP19PC4HCK6EDVCQ0GS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:22.465605+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:22.470536+00:00 + - role: user + content: According to my personal calendar, who am I meeting for dinner? + attachments: null + created: 2026-02-16 08:08:22.465674+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f50cb3d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:22.470549+00:00 + duration_ms: 27.667 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-3.yaml new file mode 100644 index 000000000..80785a43f --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-3.yaml @@ -0,0 +1,224 @@ +--- +uuid: bb2faa3a-7915-4c3a-aacf-4a1dfd78afd8 +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-3 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, who am I meeting for dinner? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, who am I meeting for dinner? + context: + id: 01KHJQWKWMMVWW1ESPJ3E09FGW + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:22.676196+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:22.677770+00:00 + - role: user + content: According to my personal calendar, who am I meeting for dinner? + attachments: null + created: 2026-02-16 08:08:22.676292+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47d4382350>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:22.677780+00:00 + duration_ms: 173.175 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-4.yaml new file mode 100644 index 000000000..ec703b90e --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-4.yaml @@ -0,0 +1,224 @@ +--- +uuid: 8b5c8d96-3e53-4a28-9f89-81c08807d9fd +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-4 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, who am I meeting for dinner? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, who am I meeting for dinner? + context: + id: 01KHJQWM88AKH3ZEMR7NFE91GM + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:23.048102+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:23.054814+00:00 + - role: user + content: According to my personal calendar, who am I meeting for dinner? + attachments: null + created: 2026-02-16 08:08:23.048172+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4ac4eb0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:23.054825+00:00 + duration_ms: 32.496 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-5.yaml new file mode 100644 index 000000000..ce93a6a81 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-5.yaml @@ -0,0 +1,224 @@ +--- +uuid: e7195cc4-5ee9-4896-83eb-585f880006ed +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-5 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, who am I meeting for dinner? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, who am I meeting for dinner? + context: + id: 01KHJQWMEQFXGGR39GZGVRSKJR + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:23.255405+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:23.258098+00:00 + - role: user + content: According to my personal calendar, who am I meeting for dinner? + attachments: null + created: 2026-02-16 08:08:23.255475+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4ac45c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:23.258108+00:00 + duration_ms: 167.154 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-6.yaml new file mode 100644 index 000000000..5684c4418 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-6.yaml @@ -0,0 +1,224 @@ +--- +uuid: 7b3b5dba-b98f-4059-b8ee-11c82c272b6e +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-6 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, who am I meeting for dinner? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, who am I meeting for dinner? + context: + id: 01KHJQWMV76JQX7DBJXDZJGJXG + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:23.655431+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:23.657004+00:00 + - role: user + content: According to my personal calendar, who am I meeting for dinner? + attachments: null + created: 2026-02-16 08:08:23.655501+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f49cd640>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:23.657014+00:00 + duration_ms: 174.272 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-7.yaml new file mode 100644 index 000000000..7d556d8be --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-7.yaml @@ -0,0 +1,224 @@ +--- +uuid: 1cb9e237-b760-4fa8-a8d0-2ba83a0fb31e +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-7 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, who am I meeting for dinner? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, who am I meeting for dinner? + context: + id: 01KHJQWN6G3JMSKEYD7V7WRGK3 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:24.016503+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:24.018041+00:00 + - role: user + content: According to my personal calendar, who am I meeting for dinner? + attachments: null + created: 2026-02-16 08:08:24.016574+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f492c670>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:24.018051+00:00 + duration_ms: 177.48 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-8.yaml new file mode 100644 index 000000000..6bc773859 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-8.yaml @@ -0,0 +1,224 @@ +--- +uuid: 8736402d-db37-443e-b9a6-563c1b0936b1 +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-8 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, who am I meeting for dinner? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, who am I meeting for dinner? + context: + id: 01KHJQWNHNGB8F9D2247DH23MF + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:24.373807+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:24.375299+00:00 + - role: user + content: According to my personal calendar, who am I meeting for dinner? + attachments: null + created: 2026-02-16 08:08:24.373879+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f48ea610>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:24.375308+00:00 + duration_ms: 170.834 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-9.yaml new file mode 100644 index 000000000..e1b022a39 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-9.yaml @@ -0,0 +1,224 @@ +--- +uuid: bcf3ccef-ed7d-4c75-b302-8b59b44aca1d +task_id: urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-9 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: According to my personal calendar, who am I meeting for dinner? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: According to my personal calendar, who am I meeting for dinner? + context: + id: 01KHJQWNWY3KAJP2Z5YTX4KVC5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:24.734445+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:24.735919+00:00 + - role: user + content: According to my personal calendar, who am I meeting for dinner? + attachments: null + created: 2026-02-16 08:08:24.734515+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4944b40>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:24.735929+00:00 + duration_ms: 173.536 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-0.yaml new file mode 100644 index 000000000..8bfe5e615 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-0.yaml @@ -0,0 +1,227 @@ +--- +uuid: 1a3387bf-f477-4795-af4b-9091eb34fb10 +task_id: urban_loft_au_calendar-am_i_leaving_the_house_today-0 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Am I leaving the house today? + expect_response: + - 'yes' + - dinner + - school + - class +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Am I leaving the house today? + context: + id: 01KHJQWPGEHW287XMA8HMP8SC0 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:25.358897+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:25.360353+00:00 + - role: user + content: Am I leaving the house today? + attachments: null + created: 2026-02-16 08:08:25.358967+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1610d50>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:25.360363+00:00 + duration_ms: 22.292 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-1.yaml new file mode 100644 index 000000000..0c7a1e55e --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-1.yaml @@ -0,0 +1,227 @@ +--- +uuid: fa2e2c1e-0132-4e7a-857e-ca043bfedf62 +task_id: urban_loft_au_calendar-am_i_leaving_the_house_today-1 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Am I leaving the house today? + expect_response: + - 'yes' + - dinner + - school + - class +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Am I leaving the house today? + context: + id: 01KHJQWPPEM0ZKNBG8633Q0Q4Q + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:25.550980+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:25.552459+00:00 + - role: user + content: Am I leaving the house today? + attachments: null + created: 2026-02-16 08:08:25.551050+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f492f530>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:25.552469+00:00 + duration_ms: 23.167 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-2.yaml new file mode 100644 index 000000000..d46a6eaf7 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-2.yaml @@ -0,0 +1,227 @@ +--- +uuid: b1922853-894e-43ca-ae89-5f0a30380d3a +task_id: urban_loft_au_calendar-am_i_leaving_the_house_today-2 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Am I leaving the house today? + expect_response: + - 'yes' + - dinner + - school + - class +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Am I leaving the house today? + context: + id: 01KHJQWPWEN64VA9HFN6XCV2FW + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:25.742518+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:25.744380+00:00 + - role: user + content: Am I leaving the house today? + attachments: null + created: 2026-02-16 08:08:25.742588+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f61056f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:25.744390+00:00 + duration_ms: 23.812 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-3.yaml new file mode 100644 index 000000000..63140381d --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-3.yaml @@ -0,0 +1,227 @@ +--- +uuid: c0593972-b1c3-49fa-890c-51da3ffce324 +task_id: urban_loft_au_calendar-am_i_leaving_the_house_today-3 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Am I leaving the house today? + expect_response: + - 'yes' + - dinner + - school + - class +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Am I leaving the house today? + context: + id: 01KHJQWQ2ZD2XY87KB332R9T8H + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:25.951232+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:25.954090+00:00 + - role: user + content: Am I leaving the house today? + attachments: null + created: 2026-02-16 08:08:25.951327+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f627b320>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:25.954100+00:00 + duration_ms: 179.019 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-4.yaml new file mode 100644 index 000000000..ea45f419f --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-4.yaml @@ -0,0 +1,227 @@ +--- +uuid: c2aa7490-4341-4067-970f-f1761ff1b2e8 +task_id: urban_loft_au_calendar-am_i_leaving_the_house_today-4 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Am I leaving the house today? + expect_response: + - 'yes' + - dinner + - school + - class +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Am I leaving the house today? + context: + id: 01KHJQWQEEC7J46TV2A27HMH5G + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:26.318546+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:26.321341+00:00 + - role: user + content: Am I leaving the house today? + attachments: null + created: 2026-02-16 08:08:26.318615+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f47bd010>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:26.321351+00:00 + duration_ms: 181.474 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-5.yaml new file mode 100644 index 000000000..1bc3d504a --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-5.yaml @@ -0,0 +1,227 @@ +--- +uuid: 4b6e08f1-3227-4c67-b347-211c81bf2129 +task_id: urban_loft_au_calendar-am_i_leaving_the_house_today-5 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Am I leaving the house today? + expect_response: + - 'yes' + - dinner + - school + - class +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Am I leaving the house today? + context: + id: 01KHJQWQT81FWEEQJA9B9VE9WR + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:26.696392+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:26.697947+00:00 + - role: user + content: Am I leaving the house today? + attachments: null + created: 2026-02-16 08:08:26.696465+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4615900>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:26.697957+00:00 + duration_ms: 22.866 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-6.yaml new file mode 100644 index 000000000..61f0888fc --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-6.yaml @@ -0,0 +1,227 @@ +--- +uuid: c0bdf497-087e-4e95-b5bc-7ecf7fc8d57a +task_id: urban_loft_au_calendar-am_i_leaving_the_house_today-6 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Am I leaving the house today? + expect_response: + - 'yes' + - dinner + - school + - class +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Am I leaving the house today? + context: + id: 01KHJQWR11F5SX4ACAGQNW37S7 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:26.913277+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:26.914935+00:00 + - role: user + content: Am I leaving the house today? + attachments: null + created: 2026-02-16 08:08:26.913364+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1c07690>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:26.914945+00:00 + duration_ms: 30.336 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-7.yaml new file mode 100644 index 000000000..d650bd5e8 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-7.yaml @@ -0,0 +1,227 @@ +--- +uuid: dd2979e6-15ef-4e69-ad96-41ce08de3312 +task_id: urban_loft_au_calendar-am_i_leaving_the_house_today-7 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Am I leaving the house today? + expect_response: + - 'yes' + - dinner + - school + - class +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Am I leaving the house today? + context: + id: 01KHJQWR7MXXR3PN5HX1GPQF9Z + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:27.124543+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:27.125988+00:00 + - role: user + content: Am I leaving the house today? + attachments: null + created: 2026-02-16 08:08:27.124612+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2cb6610>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:27.125998+00:00 + duration_ms: 22.46 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-8.yaml new file mode 100644 index 000000000..62d455c84 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-8.yaml @@ -0,0 +1,227 @@ +--- +uuid: 6ea3e3ad-dbbb-4ef6-be4a-257227123384 +task_id: urban_loft_au_calendar-am_i_leaving_the_house_today-8 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Am I leaving the house today? + expect_response: + - 'yes' + - dinner + - school + - class +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Am I leaving the house today? + context: + id: 01KHJQWRDHM2PZS489STSGPXM7 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:27.313469+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:27.315317+00:00 + - role: user + content: Am I leaving the house today? + attachments: null + created: 2026-02-16 08:08:27.313537+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47d4213530>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:27.315327+00:00 + duration_ms: 25.203 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-9.yaml new file mode 100644 index 000000000..89e650fa6 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-am_i_leaving_the_house_today-9.yaml @@ -0,0 +1,227 @@ +--- +uuid: 425be8bf-548c-46fc-baab-5e46a2224a95 +task_id: urban_loft_au_calendar-am_i_leaving_the_house_today-9 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Am I leaving the house today? + expect_response: + - 'yes' + - dinner + - school + - class +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Am I leaving the house today? + context: + id: 01KHJQWRKWGRKB8KVMVGMM6P5C + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:27.516989+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:27.523594+00:00 + - role: user + content: Am I leaving the house today? + attachments: null + created: 2026-02-16 08:08:27.517059+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1dabc10>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:27.523604+00:00 + duration_ms: 176.821 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-0.yaml new file mode 100644 index 000000000..a79c57acb --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-0.yaml @@ -0,0 +1,235 @@ +--- +uuid: 996d1d45-b172-4ff0-888f-8bb66d6c362a +task_id: urban_loft_au_calendar-are_we_leaving_the_house_today-0 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Are we leaving the house today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Are we leaving the house today? + context: + id: 01KHJQWVEWDNTWH4C5JSE0BHPT + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:30.428987+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:30.431453+00:00 + - role: user + content: Are we leaving the house today? + attachments: null + created: 2026-02-16 08:08:30.429056+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c19778a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:30.431463+00:00 + duration_ms: 28.384 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-1.yaml new file mode 100644 index 000000000..f75779b48 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-1.yaml @@ -0,0 +1,235 @@ +--- +uuid: dfc08a19-a26b-403c-b6a2-9201d4562fd7 +task_id: urban_loft_au_calendar-are_we_leaving_the_house_today-1 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Are we leaving the house today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Are we leaving the house today? + context: + id: 01KHJQWVY9SJRMAYH28B07J27G + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:30.921673+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:30.923096+00:00 + - role: user + content: Are we leaving the house today? + attachments: null + created: 2026-02-16 08:08:30.921742+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1cc12d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:30.923105+00:00 + duration_ms: 28.661 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-2.yaml new file mode 100644 index 000000000..61c568cdd --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-2.yaml @@ -0,0 +1,235 @@ +--- +uuid: f9e1e9af-36f9-4a71-a70d-90c840e09b64 +task_id: urban_loft_au_calendar-are_we_leaving_the_house_today-2 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Are we leaving the house today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Are we leaving the house today? + context: + id: 01KHJQWW4J1N7CDVS5B09Y837R + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:31.122990+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:31.124563+00:00 + - role: user + content: Are we leaving the house today? + attachments: null + created: 2026-02-16 08:08:31.123060+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f45da560>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:31.124573+00:00 + duration_ms: 168.523 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-3.yaml new file mode 100644 index 000000000..f4f7deacc --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-3.yaml @@ -0,0 +1,235 @@ +--- +uuid: aeee58c3-9c27-4a5c-bbf3-374e0eaf0dcb +task_id: urban_loft_au_calendar-are_we_leaving_the_house_today-3 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Are we leaving the house today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Are we leaving the house today? + context: + id: 01KHJQWWFRKMRKZ94TBR954RMF + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:31.480282+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:31.483588+00:00 + - role: user + content: Are we leaving the house today? + attachments: null + created: 2026-02-16 08:08:31.480359+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1bba1f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:31.483599+00:00 + duration_ms: 177.159 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-4.yaml new file mode 100644 index 000000000..800773d9a --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-4.yaml @@ -0,0 +1,235 @@ +--- +uuid: 7d5f9a20-8711-49f8-9053-fcc5700eaf06 +task_id: urban_loft_au_calendar-are_we_leaving_the_house_today-4 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Are we leaving the house today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Are we leaving the house today? + context: + id: 01KHJQWWVGH62A25YVC4CQE5GC + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:31.856867+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:31.859650+00:00 + - role: user + content: Are we leaving the house today? + attachments: null + created: 2026-02-16 08:08:31.856940+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4fdbb60>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:31.859660+00:00 + duration_ms: 27.123 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-5.yaml new file mode 100644 index 000000000..534bb4042 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-5.yaml @@ -0,0 +1,235 @@ +--- +uuid: d00b0207-25a3-4547-9461-61422e471938 +task_id: urban_loft_au_calendar-are_we_leaving_the_house_today-5 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Are we leaving the house today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Are we leaving the house today? + context: + id: 01KHJQWX45E2ZFAFNKA1DD29Q8 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:32.133858+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:32.135509+00:00 + - role: user + content: Are we leaving the house today? + attachments: null + created: 2026-02-16 08:08:32.133931+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2d32140>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:32.135519+00:00 + duration_ms: 29.506 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-6.yaml new file mode 100644 index 000000000..40ed77699 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-6.yaml @@ -0,0 +1,235 @@ +--- +uuid: 9b4266fe-04d9-4340-834a-1dbd75cd281a +task_id: urban_loft_au_calendar-are_we_leaving_the_house_today-6 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Are we leaving the house today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Are we leaving the house today? + context: + id: 01KHJQWXB8T51DWZW1JTKER8QC + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:32.360484+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:32.361950+00:00 + - role: user + content: Are we leaving the house today? + attachments: null + created: 2026-02-16 08:08:32.360554+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c294d590>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:32.361959+00:00 + duration_ms: 41.01 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-7.yaml new file mode 100644 index 000000000..a841fb2a1 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-7.yaml @@ -0,0 +1,235 @@ +--- +uuid: 62b183a0-24ce-4937-8c79-6a9b17b965f2 +task_id: urban_loft_au_calendar-are_we_leaving_the_house_today-7 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Are we leaving the house today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Are we leaving the house today? + context: + id: 01KHJQWXHTRJE2BCWA019NBKM5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:32.570228+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:32.576744+00:00 + - role: user + content: Are we leaving the house today? + attachments: null + created: 2026-02-16 08:08:32.570364+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c28887d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:32.576755+00:00 + duration_ms: 27.66 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-8.yaml new file mode 100644 index 000000000..9a9cef55b --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-8.yaml @@ -0,0 +1,235 @@ +--- +uuid: 6e7adc4b-82ad-45ff-99e6-1ccc2fc7322e +task_id: urban_loft_au_calendar-are_we_leaving_the_house_today-8 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Are we leaving the house today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Are we leaving the house today? + context: + id: 01KHJQWXRFJ2YYY4GWW7FH7VX5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:32.783326+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:32.784750+00:00 + - role: user + content: Are we leaving the house today? + attachments: null + created: 2026-02-16 08:08:32.783396+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f45b80f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:32.784759+00:00 + duration_ms: 172.912 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-9.yaml new file mode 100644 index 000000000..1e6cdb569 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-are_we_leaving_the_house_today-9.yaml @@ -0,0 +1,235 @@ +--- +uuid: 106ffaf8-2060-4baf-989e-087fa5a156c2 +task_id: urban_loft_au_calendar-are_we_leaving_the_house_today-9 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Are we leaving the house today? + expect_response: + - 'no' + - nothing + - not leaving + - may not be leaving + - do not have any events + - don't have any events + - do not see + - don't see + - does not seem + - doesn't seem + - at home + - no plans +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Are we leaving the house today? + context: + id: 01KHJQWY3M67Y9VG8C2SF1NT0G + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:33.140174+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:33.142083+00:00 + - role: user + content: Are we leaving the house today? + attachments: null + created: 2026-02-16 08:08:33.140265+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47d4276820>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:33.142094+00:00 + duration_ms: 168.778 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-0.yaml new file mode 100644 index 000000000..179c773ad --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-0.yaml @@ -0,0 +1,227 @@ +--- +uuid: 6f4f774b-113a-4e46-bb52-e96947f50895 +task_id: urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-0 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Do i have any personal calendar events away from home today? + expect_response: + - 'yes' + - dinner + - school + - class +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Do i have any personal calendar events away from home today? + context: + id: 01KHJQWRYX8P1K4X3MSZTKHCCN + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:27.869517+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:27.876080+00:00 + - role: user + content: Do i have any personal calendar events away from home today? + attachments: null + created: 2026-02-16 08:08:27.869588+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2fedf30>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:27.876092+00:00 + duration_ms: 175.793 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-1.yaml new file mode 100644 index 000000000..af234788b --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-1.yaml @@ -0,0 +1,227 @@ +--- +uuid: 9648bd7b-51af-4fcb-8be9-f2e73e45802a +task_id: urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-1 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Do i have any personal calendar events away from home today? + expect_response: + - 'yes' + - dinner + - school + - class +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Do i have any personal calendar events away from home today? + context: + id: 01KHJQWS9SGP874BP4PP9AX8ZZ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:28.217961+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:28.222633+00:00 + - role: user + content: Do i have any personal calendar events away from home today? + attachments: null + created: 2026-02-16 08:08:28.218032+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f49f0b40>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:28.222643+00:00 + duration_ms: 27.318 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-2.yaml new file mode 100644 index 000000000..bc0e5f1bb --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-2.yaml @@ -0,0 +1,227 @@ +--- +uuid: 0078063a-0256-4f33-87f8-92089e935c92 +task_id: urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-2 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Do i have any personal calendar events away from home today? + expect_response: + - 'yes' + - dinner + - school + - class +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Do i have any personal calendar events away from home today? + context: + id: 01KHJQWSGMP19F92PTHSMHWKNZ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:28.437076+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:28.439058+00:00 + - role: user + content: Do i have any personal calendar events away from home today? + attachments: null + created: 2026-02-16 08:08:28.437147+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4bba980>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:28.439068+00:00 + duration_ms: 25.08 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-3.yaml new file mode 100644 index 000000000..c1bee9a57 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-3.yaml @@ -0,0 +1,227 @@ +--- +uuid: a472f683-72e5-4dbe-8ca1-cb6a8e3f3ce8 +task_id: urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-3 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Do i have any personal calendar events away from home today? + expect_response: + - 'yes' + - dinner + - school + - class +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Do i have any personal calendar events away from home today? + context: + id: 01KHJQWSQEGC5PMRSTGSV0KHV8 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:28.654450+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:28.656909+00:00 + - role: user + content: Do i have any personal calendar events away from home today? + attachments: null + created: 2026-02-16 08:08:28.654525+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4dab480>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:28.656919+00:00 + duration_ms: 173.949 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-4.yaml new file mode 100644 index 000000000..fc5a85657 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-4.yaml @@ -0,0 +1,227 @@ +--- +uuid: 0277f010-ee0d-4721-a259-66ee3e3d895f +task_id: urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-4 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Do i have any personal calendar events away from home today? + expect_response: + - 'yes' + - dinner + - school + - class +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Do i have any personal calendar events away from home today? + context: + id: 01KHJQWT2N1ZCKDY4V5XF1CMK5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:29.013165+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:29.016040+00:00 + - role: user + content: Do i have any personal calendar events away from home today? + attachments: null + created: 2026-02-16 08:08:29.013237+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4b53060>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:29.016050+00:00 + duration_ms: 29.519 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-5.yaml new file mode 100644 index 000000000..ca980abd4 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-5.yaml @@ -0,0 +1,227 @@ +--- +uuid: 896b8e73-aba4-42cd-b6be-186efd1dfe25 +task_id: urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-5 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Do i have any personal calendar events away from home today? + expect_response: + - 'yes' + - dinner + - school + - class +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Do i have any personal calendar events away from home today? + context: + id: 01KHJQWTA650Y51GFZ9SKTHN2W + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:29.254864+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:29.256463+00:00 + - role: user + content: Do i have any personal calendar events away from home today? + attachments: null + created: 2026-02-16 08:08:29.254935+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2c0e140>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:29.256472+00:00 + duration_ms: 178.801 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-6.yaml new file mode 100644 index 000000000..3dcf169c4 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-6.yaml @@ -0,0 +1,227 @@ +--- +uuid: 6b4030d7-453c-4780-8f3a-c2cb6d80c49d +task_id: urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-6 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Do i have any personal calendar events away from home today? + expect_response: + - 'yes' + - dinner + - school + - class +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Do i have any personal calendar events away from home today? + context: + id: 01KHJQWTN7VTD074KZD6BGQ3V9 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:29.607670+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:29.609231+00:00 + - role: user + content: Do i have any personal calendar events away from home today? + attachments: null + created: 2026-02-16 08:08:29.607742+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f48e8a90>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:29.609255+00:00 + duration_ms: 24.265 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-7.yaml new file mode 100644 index 000000000..293de8fd8 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-7.yaml @@ -0,0 +1,227 @@ +--- +uuid: f037d739-1c63-42b5-ad96-aa0ef94d87a3 +task_id: urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-7 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Do i have any personal calendar events away from home today? + expect_response: + - 'yes' + - dinner + - school + - class +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Do i have any personal calendar events away from home today? + context: + id: 01KHJQWTVCW298Z4AXENDWPJD5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:29.804230+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:29.805748+00:00 + - role: user + content: Do i have any personal calendar events away from home today? + attachments: null + created: 2026-02-16 08:08:29.804329+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2a13530>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:29.805757+00:00 + duration_ms: 22.137 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-8.yaml new file mode 100644 index 000000000..6e5b61bc0 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-8.yaml @@ -0,0 +1,227 @@ +--- +uuid: e86e0956-4c97-49f8-8bea-30f27d885002 +task_id: urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-8 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Do i have any personal calendar events away from home today? + expect_response: + - 'yes' + - dinner + - school + - class +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Do i have any personal calendar events away from home today? + context: + id: 01KHJQWV1TNSQFD5PH0R2VT77A + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:30.011037+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:30.015155+00:00 + - role: user + content: Do i have any personal calendar events away from home today? + attachments: null + created: 2026-02-16 08:08:30.011113+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1b662a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:30.015166+00:00 + duration_ms: 34.82 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-9.yaml new file mode 100644 index 000000000..817990850 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-9.yaml @@ -0,0 +1,227 @@ +--- +uuid: 24d90c59-9ff2-4109-ab0c-d0dd4de68f2f +task_id: urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-9 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Do i have any personal calendar events away from home today? + expect_response: + - 'yes' + - dinner + - school + - class +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Do i have any personal calendar events away from home today? + context: + id: 01KHJQWV8FXJACGN7PYJ9JT71W + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:30.223853+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:30.227936+00:00 + - role: user + content: Do i have any personal calendar events away from home today? + attachments: null + created: 2026-02-16 08:08:30.223923+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1f177f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:30.227948+00:00 + duration_ms: 25.609 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-0.yaml new file mode 100644 index 000000000..d88f43f29 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-0.yaml @@ -0,0 +1,226 @@ +--- +uuid: 21e7f56a-8e42-4a7e-9f00-a09ad125690b +task_id: urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-0 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: From my personal calendar, how many nights do I need to cook this week? + expect_response: + - '3' + - three +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: From my personal calendar, how many nights do I need to cook this week? + context: + id: 01KHJQX6432Z11004XYPM1TREG + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:41.347351+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:41.348898+00:00 + - role: user + content: From my personal calendar, how many nights do I need to cook this + week? + attachments: null + created: 2026-02-16 08:08:41.347430+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4ae94e0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:41.348908+00:00 + duration_ms: 25.152 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-1.yaml new file mode 100644 index 000000000..1299a49f5 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-1.yaml @@ -0,0 +1,226 @@ +--- +uuid: b0e32cd8-0a51-4146-8d8a-c9237d41750b +task_id: urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-1 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: From my personal calendar, how many nights do I need to cook this week? + expect_response: + - '3' + - three +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: From my personal calendar, how many nights do I need to cook this week? + context: + id: 01KHJQX6A4V6T21Y4BFY5RCSH5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:41.541014+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:41.542560+00:00 + - role: user + content: From my personal calendar, how many nights do I need to cook this + week? + attachments: null + created: 2026-02-16 08:08:41.541085+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1a84bf0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:41.542570+00:00 + duration_ms: 23.593 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-2.yaml new file mode 100644 index 000000000..012a5cd32 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-2.yaml @@ -0,0 +1,226 @@ +--- +uuid: 837608bd-6b11-452d-9128-15536456dbd9 +task_id: urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-2 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: From my personal calendar, how many nights do I need to cook this week? + expect_response: + - '3' + - three +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: From my personal calendar, how many nights do I need to cook this week? + context: + id: 01KHJQX6GZDTZ3B4A1ZVW0GCZB + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:41.760043+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:41.762152+00:00 + - role: user + content: From my personal calendar, how many nights do I need to cook this + week? + attachments: null + created: 2026-02-16 08:08:41.760111+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1bba350>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:41.762163+00:00 + duration_ms: 29.998 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-3.yaml new file mode 100644 index 000000000..5c84d6ab4 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-3.yaml @@ -0,0 +1,226 @@ +--- +uuid: b40c3d13-ec7b-4c82-ae60-61638d76e858 +task_id: urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-3 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: From my personal calendar, how many nights do I need to cook this week? + expect_response: + - '3' + - three +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: From my personal calendar, how many nights do I need to cook this week? + context: + id: 01KHJQX6Q9JGMKCF7WPGS0VZ2T + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:41.961452+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:41.965873+00:00 + - role: user + content: From my personal calendar, how many nights do I need to cook this + week? + attachments: null + created: 2026-02-16 08:08:41.961520+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2cd7cc0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:41.965884+00:00 + duration_ms: 32.941 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-4.yaml new file mode 100644 index 000000000..769029763 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-4.yaml @@ -0,0 +1,226 @@ +--- +uuid: 2b960f19-e059-4fbc-9734-dd01522a87ea +task_id: urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-4 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: From my personal calendar, how many nights do I need to cook this week? + expect_response: + - '3' + - three +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: From my personal calendar, how many nights do I need to cook this week? + context: + id: 01KHJQX6XTHTBSZXP5NYHVWBNK + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:42.170528+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:42.172545+00:00 + - role: user + content: From my personal calendar, how many nights do I need to cook this + week? + attachments: null + created: 2026-02-16 08:08:42.170599+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2a90720>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:42.172556+00:00 + duration_ms: 26.159 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-5.yaml new file mode 100644 index 000000000..464203b37 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-5.yaml @@ -0,0 +1,226 @@ +--- +uuid: 13cbaace-d372-446b-aeab-5b33f5535cc2 +task_id: urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-5 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: From my personal calendar, how many nights do I need to cook this week? + expect_response: + - '3' + - three +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: From my personal calendar, how many nights do I need to cook this week? + context: + id: 01KHJQX74RSGBNBMMCN589VM82 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:42.392666+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:42.394504+00:00 + - role: user + content: From my personal calendar, how many nights do I need to cook this + week? + attachments: null + created: 2026-02-16 08:08:42.392736+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4e0fb60>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:42.394513+00:00 + duration_ms: 28.744 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-6.yaml new file mode 100644 index 000000000..d55d1a008 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-6.yaml @@ -0,0 +1,226 @@ +--- +uuid: 95c0f487-68c2-406b-af87-0772258dba08 +task_id: urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-6 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: From my personal calendar, how many nights do I need to cook this week? + expect_response: + - '3' + - three +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: From my personal calendar, how many nights do I need to cook this week? + context: + id: 01KHJQX7B5WS9WBW3PV521WNH2 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:42.597530+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:42.600513+00:00 + - role: user + content: From my personal calendar, how many nights do I need to cook this + week? + attachments: null + created: 2026-02-16 08:08:42.597599+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4887270>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:42.600523+00:00 + duration_ms: 25.196 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-7.yaml new file mode 100644 index 000000000..4036ecf2b --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-7.yaml @@ -0,0 +1,226 @@ +--- +uuid: d0bb1524-ef5b-4d55-ba82-aa180b8bfba4 +task_id: urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-7 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: From my personal calendar, how many nights do I need to cook this week? + expect_response: + - '3' + - three +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: From my personal calendar, how many nights do I need to cook this week? + context: + id: 01KHJQX7HYBFZ5HFMA1DJSGXZJ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:42.814144+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:42.817006+00:00 + - role: user + content: From my personal calendar, how many nights do I need to cook this + week? + attachments: null + created: 2026-02-16 08:08:42.814211+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c0f05b10>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:42.817016+00:00 + duration_ms: 28.557 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-8.yaml new file mode 100644 index 000000000..6eeaba199 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-8.yaml @@ -0,0 +1,226 @@ +--- +uuid: 7a84c243-6652-4981-9d56-0adad04a52b4 +task_id: urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-8 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: From my personal calendar, how many nights do I need to cook this week? + expect_response: + - '3' + - three +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: From my personal calendar, how many nights do I need to cook this week? + context: + id: 01KHJQX7R2920D3ZPJ03ERHJSA + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:43.010527+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:43.050339+00:00 + - role: user + content: From my personal calendar, how many nights do I need to cook this + week? + attachments: null + created: 2026-02-16 08:08:43.010598+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2905430>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:43.050359+00:00 + duration_ms: 83.87 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-9.yaml new file mode 100644 index 000000000..f1e7e2e3d --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-9.yaml @@ -0,0 +1,226 @@ +--- +uuid: 8845ad19-d59b-4386-9956-0b42a6d93474 +task_id: urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-9 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: From my personal calendar, how many nights do I need to cook this week? + expect_response: + - '3' + - three +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: From my personal calendar, how many nights do I need to cook this week? + context: + id: 01KHJQX816WMX4DKF6ZZJQNY0J + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:43.302745+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:43.304331+00:00 + - role: user + content: From my personal calendar, how many nights do I need to cook this + week? + attachments: null + created: 2026-02-16 08:08:43.302815+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2fb2ae0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:43.304342+00:00 + duration_ms: 29.065 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-0.yaml new file mode 100644 index 000000000..8a060fe0a --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-0.yaml @@ -0,0 +1,225 @@ +--- +uuid: e079a7bb-33db-4105-91c8-91be2eb8627e +task_id: urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-0 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: How many nights this week do I need to cook? + expect_response: + - '3' + - three +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many nights this week do I need to cook? + context: + id: 01KHJQX3GJ59YTQQR6HWJD6R9Q + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:38.674483+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:38.675904+00:00 + - role: user + content: How many nights this week do I need to cook? + attachments: null + created: 2026-02-16 08:08:38.674554+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47d42cf110>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:38.675914+00:00 + duration_ms: 22.139 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-1.yaml new file mode 100644 index 000000000..cb5511176 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-1.yaml @@ -0,0 +1,225 @@ +--- +uuid: df1039ce-aea3-420d-86cd-d9b43b6b510d +task_id: urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-1 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: How many nights this week do I need to cook? + expect_response: + - '3' + - three +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many nights this week do I need to cook? + context: + id: 01KHJQX3Q92EYR1PKXJWB4PDM4 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:38.889511+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:38.893539+00:00 + - role: user + content: How many nights this week do I need to cook? + attachments: null + created: 2026-02-16 08:08:38.889581+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c19d4250>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:38.893550+00:00 + duration_ms: 30.07 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-2.yaml new file mode 100644 index 000000000..e8f057a63 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-2.yaml @@ -0,0 +1,225 @@ +--- +uuid: 65cb2a60-e653-4f1f-9639-ce72181f0d63 +task_id: urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-2 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: How many nights this week do I need to cook? + expect_response: + - '3' + - three +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many nights this week do I need to cook? + context: + id: 01KHJQX3XT9AZHPSC7P12YGAPS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:39.098375+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:39.105542+00:00 + - role: user + content: How many nights this week do I need to cook? + attachments: null + created: 2026-02-16 08:08:39.098447+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4c3d6f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:39.105555+00:00 + duration_ms: 44.76 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-3.yaml new file mode 100644 index 000000000..0b6e3879b --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-3.yaml @@ -0,0 +1,225 @@ +--- +uuid: f577a3c9-0b81-4860-a623-02c45c01c872 +task_id: urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-3 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: How many nights this week do I need to cook? + expect_response: + - '3' + - three +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many nights this week do I need to cook? + context: + id: 01KHJQX459M8RNZ4Z4EYYQJ2PE + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:39.337422+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:39.339574+00:00 + - role: user + content: How many nights this week do I need to cook? + attachments: null + created: 2026-02-16 08:08:39.337497+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1d87ab0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:39.339584+00:00 + duration_ms: 27.52 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-4.yaml new file mode 100644 index 000000000..9a3743912 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-4.yaml @@ -0,0 +1,225 @@ +--- +uuid: 4d1fb45b-cac2-4b19-bd54-1b14fe88e124 +task_id: urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-4 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: How many nights this week do I need to cook? + expect_response: + - '3' + - three +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many nights this week do I need to cook? + context: + id: 01KHJQX4BGW3WP6EM635QHQSPJ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:39.536192+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:39.539209+00:00 + - role: user + content: How many nights this week do I need to cook? + attachments: null + created: 2026-02-16 08:08:39.536303+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f45d0eb0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:39.539221+00:00 + duration_ms: 32.66 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-5.yaml new file mode 100644 index 000000000..dd302e49a --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-5.yaml @@ -0,0 +1,225 @@ +--- +uuid: c6e42117-b944-455f-90e4-2db2273eaae4 +task_id: urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-5 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: How many nights this week do I need to cook? + expect_response: + - '3' + - three +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many nights this week do I need to cook? + context: + id: 01KHJQX4TWXPM3V9QPYP5CJFR2 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:40.028288+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:40.032033+00:00 + - role: user + content: How many nights this week do I need to cook? + attachments: null + created: 2026-02-16 08:08:40.028367+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1b2ee50>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:40.032043+00:00 + duration_ms: 171.19 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-6.yaml new file mode 100644 index 000000000..b0863e18e --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-6.yaml @@ -0,0 +1,225 @@ +--- +uuid: b70b61bd-69b4-4e40-84ed-2c281ea408cb +task_id: urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-6 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: How many nights this week do I need to cook? + expect_response: + - '3' + - three +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many nights this week do I need to cook? + context: + id: 01KHJQX57CCNN333PBSAKDXVV4 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:40.428855+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:40.440694+00:00 + - role: user + content: How many nights this week do I need to cook? + attachments: null + created: 2026-02-16 08:08:40.428930+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1d90510>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:40.440712+00:00 + duration_ms: 186.98 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-7.yaml new file mode 100644 index 000000000..e500a345b --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-7.yaml @@ -0,0 +1,225 @@ +--- +uuid: a084eba5-0969-4d62-9aa1-2e8aea6d44a1 +task_id: urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-7 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: How many nights this week do I need to cook? + expect_response: + - '3' + - three +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many nights this week do I need to cook? + context: + id: 01KHJQX5JBQTGZD2NKNT2QVNT0 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:40.779319+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:40.780862+00:00 + - role: user + content: How many nights this week do I need to cook? + attachments: null + created: 2026-02-16 08:08:40.779390+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2951bc0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:40.780871+00:00 + duration_ms: 23.64 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-8.yaml new file mode 100644 index 000000000..a342b8daa --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-8.yaml @@ -0,0 +1,225 @@ +--- +uuid: 52e491a4-b268-43c6-976d-6ed4ab912d6d +task_id: urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-8 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: How many nights this week do I need to cook? + expect_response: + - '3' + - three +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many nights this week do I need to cook? + context: + id: 01KHJQX5R05MHF5RCJC9K76DE4 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:40.960474+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:40.962010+00:00 + - role: user + content: How many nights this week do I need to cook? + attachments: null + created: 2026-02-16 08:08:40.960551+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2be3950>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:40.962020+00:00 + duration_ms: 26.421 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-9.yaml new file mode 100644 index 000000000..eda7b7ab8 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-9.yaml @@ -0,0 +1,225 @@ +--- +uuid: 1a847c99-0e14-414e-8862-cb4a9dc5a7b9 +task_id: urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-9 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: How many nights this week do I need to cook? + expect_response: + - '3' + - three +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many nights this week do I need to cook? + context: + id: 01KHJQX5XWPYV9R02FH7ARV8FT + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:41.149083+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:41.150642+00:00 + - role: user + content: How many nights this week do I need to cook? + attachments: null + created: 2026-02-16 08:08:41.149153+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c253eda0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:41.150652+00:00 + duration_ms: 22.775 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-0.yaml new file mode 100644 index 000000000..e37170512 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-0.yaml @@ -0,0 +1,224 @@ +--- +uuid: 07442f84-60d1-4b39-a5e4-46e93f5415e8 +task_id: urban_loft_au_calendar-is_anyone_coming_to_visit-0 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Is anyone coming to visit? + expect_response: + - liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is anyone coming to visit? + context: + id: 01KHJQX87W5PKTSFZ2JYDGMWPT + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:43.516749+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:43.518276+00:00 + - role: user + content: Is anyone coming to visit? + attachments: null + created: 2026-02-16 08:08:43.516818+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2c526c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:43.518286+00:00 + duration_ms: 32.096 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-1.yaml new file mode 100644 index 000000000..3c10d3fc3 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-1.yaml @@ -0,0 +1,224 @@ +--- +uuid: d79e5be2-f958-4f2b-a50c-66a2c253e4b4 +task_id: urban_loft_au_calendar-is_anyone_coming_to_visit-1 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Is anyone coming to visit? + expect_response: + - liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is anyone coming to visit? + context: + id: 01KHJQX8E7PA7GT9S8DGTKRM6M + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:43.719519+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:43.722087+00:00 + - role: user + content: Is anyone coming to visit? + attachments: null + created: 2026-02-16 08:08:43.719587+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2643a00>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:43.722097+00:00 + duration_ms: 24.015 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-2.yaml new file mode 100644 index 000000000..6f2bd5825 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-2.yaml @@ -0,0 +1,224 @@ +--- +uuid: 020805d2-8bc5-45ca-91e0-72eaf71418c8 +task_id: urban_loft_au_calendar-is_anyone_coming_to_visit-2 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Is anyone coming to visit? + expect_response: + - liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is anyone coming to visit? + context: + id: 01KHJQX8MC8V8ZYKSFQ3N2VCQV + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:43.916718+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:43.918606+00:00 + - role: user + content: Is anyone coming to visit? + attachments: null + created: 2026-02-16 08:08:43.916785+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47d44c6b90>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:43.918617+00:00 + duration_ms: 23.889 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-3.yaml new file mode 100644 index 000000000..4ba15c757 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-3.yaml @@ -0,0 +1,224 @@ +--- +uuid: d5fc06a5-c48f-426b-adde-a7aec0830115 +task_id: urban_loft_au_calendar-is_anyone_coming_to_visit-3 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Is anyone coming to visit? + expect_response: + - liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is anyone coming to visit? + context: + id: 01KHJQX8TVJC0CDXP11TBH0JJ9 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:44.123319+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:44.127747+00:00 + - role: user + content: Is anyone coming to visit? + attachments: null + created: 2026-02-16 08:08:44.123389+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1bac930>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:44.127758+00:00 + duration_ms: 30.004 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-4.yaml new file mode 100644 index 000000000..53b7effd3 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-4.yaml @@ -0,0 +1,224 @@ +--- +uuid: 9e3bf5b0-0f99-4807-a2b9-8a5ffad32a67 +task_id: urban_loft_au_calendar-is_anyone_coming_to_visit-4 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Is anyone coming to visit? + expect_response: + - liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is anyone coming to visit? + context: + id: 01KHJQX91B1GG8BSGHXW27YW69 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:44.331585+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:44.336073+00:00 + - role: user + content: Is anyone coming to visit? + attachments: null + created: 2026-02-16 08:08:44.331657+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c28901a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:44.336085+00:00 + duration_ms: 171.132 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-5.yaml new file mode 100644 index 000000000..c9b4cde08 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-5.yaml @@ -0,0 +1,224 @@ +--- +uuid: 313717a5-fbe2-4c00-93ad-796158f516ea +task_id: urban_loft_au_calendar-is_anyone_coming_to_visit-5 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Is anyone coming to visit? + expect_response: + - liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is anyone coming to visit? + context: + id: 01KHJQX9CMJ33ZHWEJ6Z9HPAYV + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:44.692771+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:44.695969+00:00 + - role: user + content: Is anyone coming to visit? + attachments: null + created: 2026-02-16 08:08:44.692838+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2c7c720>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:44.695979+00:00 + duration_ms: 33.05 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-6.yaml new file mode 100644 index 000000000..a7bd537c7 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-6.yaml @@ -0,0 +1,224 @@ +--- +uuid: 77967c17-ca3e-4351-991e-0f7437e9e4be +task_id: urban_loft_au_calendar-is_anyone_coming_to_visit-6 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Is anyone coming to visit? + expect_response: + - liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is anyone coming to visit? + context: + id: 01KHJQX9K6JMSRCHZGS3H4PHK1 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:44.902385+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:44.904179+00:00 + - role: user + content: Is anyone coming to visit? + attachments: null + created: 2026-02-16 08:08:44.902453+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1734b40>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:44.904188+00:00 + duration_ms: 26.38 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-7.yaml new file mode 100644 index 000000000..72f4a247a --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-7.yaml @@ -0,0 +1,224 @@ +--- +uuid: 3fd59680-180e-4fad-997d-c8640e95378b +task_id: urban_loft_au_calendar-is_anyone_coming_to_visit-7 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Is anyone coming to visit? + expect_response: + - liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is anyone coming to visit? + context: + id: 01KHJQX9SQE12CG430PPD8V2FC + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:45.111724+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:45.113601+00:00 + - role: user + content: Is anyone coming to visit? + attachments: null + created: 2026-02-16 08:08:45.111794+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f48bc250>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:45.113611+00:00 + duration_ms: 24.406 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-8.yaml new file mode 100644 index 000000000..5201bd07f --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-8.yaml @@ -0,0 +1,224 @@ +--- +uuid: f12245cf-26cc-4d55-a67b-f4a191b8b0b6 +task_id: urban_loft_au_calendar-is_anyone_coming_to_visit-8 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Is anyone coming to visit? + expect_response: + - liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is anyone coming to visit? + context: + id: 01KHJQXA0D4Y1ZJYW8WK698XSP + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:45.325200+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:45.327126+00:00 + - role: user + content: Is anyone coming to visit? + attachments: null + created: 2026-02-16 08:08:45.325294+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f462ac40>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:45.327136+00:00 + duration_ms: 29.49 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-9.yaml new file mode 100644 index 000000000..a5c124f30 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-is_anyone_coming_to_visit-9.yaml @@ -0,0 +1,224 @@ +--- +uuid: 30752e8a-92da-4b1d-91a2-eae78eaad4fd +task_id: urban_loft_au_calendar-is_anyone_coming_to_visit-9 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Is anyone coming to visit? + expect_response: + - liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is anyone coming to visit? + context: + id: 01KHJQXA7WPV3HDR0YN2963FZS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:45.564172+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:45.575662+00:00 + - role: user + content: Is anyone coming to visit? + attachments: null + created: 2026-02-16 08:08:45.564270+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1834880>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:45.575678+00:00 + duration_ms: 32.46 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-0.yaml new file mode 100644 index 000000000..ce28843a4 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-0.yaml @@ -0,0 +1,224 @@ +--- +uuid: 7dd40cb5-aaf7-4022-9b85-9c8da4d5589c +task_id: urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-0 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: What classes are on my personal calendar today? + expect_response: + - chemistry +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What classes are on my personal calendar today? + context: + id: 01KHJQWDWBDNAR9PYC3TQFN2Y6 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:16.523216+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:16.524806+00:00 + - role: user + content: What classes are on my personal calendar today? + attachments: null + created: 2026-02-16 08:08:16.523314+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1ad9dd0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:16.524817+00:00 + duration_ms: 23.502 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-1.yaml new file mode 100644 index 000000000..7c05c5c30 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-1.yaml @@ -0,0 +1,224 @@ +--- +uuid: 57109ae9-44e1-41e5-a9ed-1935333cf671 +task_id: urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-1 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: What classes are on my personal calendar today? + expect_response: + - chemistry +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What classes are on my personal calendar today? + context: + id: 01KHJQWE3N500JCDVZN98Y4FXS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:16.757450+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:16.758969+00:00 + - role: user + content: What classes are on my personal calendar today? + attachments: null + created: 2026-02-16 08:08:16.757526+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c187cca0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:16.758978+00:00 + duration_ms: 323.483 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-2.yaml new file mode 100644 index 000000000..4d7231349 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-2.yaml @@ -0,0 +1,224 @@ +--- +uuid: 624e7ea0-bd01-4bfa-beaa-8ffd484f64b6 +task_id: urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-2 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: What classes are on my personal calendar today? + expect_response: + - chemistry +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What classes are on my personal calendar today? + context: + id: 01KHJQWEKFC7GA3CA0TGNW6PF6 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:17.263231+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:17.264802+00:00 + - role: user + content: What classes are on my personal calendar today? + attachments: null + created: 2026-02-16 08:08:17.263342+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1da8eb0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:17.264812+00:00 + duration_ms: 25.338 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-3.yaml new file mode 100644 index 000000000..020101405 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-3.yaml @@ -0,0 +1,224 @@ +--- +uuid: 926fbaf3-5c26-4fcb-b1c9-935266d4f78d +task_id: urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-3 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: What classes are on my personal calendar today? + expect_response: + - chemistry +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What classes are on my personal calendar today? + context: + id: 01KHJQWETSKN70KMPVRHJ9NM8E + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:17.498031+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:17.499869+00:00 + - role: user + content: What classes are on my personal calendar today? + attachments: null + created: 2026-02-16 08:08:17.498102+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c19287d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:17.499879+00:00 + duration_ms: 29.001 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-4.yaml new file mode 100644 index 000000000..371aeb7bc --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-4.yaml @@ -0,0 +1,224 @@ +--- +uuid: eee2aa9f-2237-4a8c-8eb4-ffb1d21bb848 +task_id: urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-4 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: What classes are on my personal calendar today? + expect_response: + - chemistry +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What classes are on my personal calendar today? + context: + id: 01KHJQWF15FHWH69W84FHP13JT + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:17.701483+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:17.702955+00:00 + - role: user + content: What classes are on my personal calendar today? + attachments: null + created: 2026-02-16 08:08:17.701552+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f47bdbc0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:17.702965+00:00 + duration_ms: 24.855 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-5.yaml new file mode 100644 index 000000000..434560626 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-5.yaml @@ -0,0 +1,224 @@ +--- +uuid: 8575f705-cacb-4c31-8a2f-9cea48612335 +task_id: urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-5 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: What classes are on my personal calendar today? + expect_response: + - chemistry +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What classes are on my personal calendar today? + context: + id: 01KHJQWF7JSZM7YSSQF6Q5MGZX + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:17.906613+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:17.908527+00:00 + - role: user + content: What classes are on my personal calendar today? + attachments: null + created: 2026-02-16 08:08:17.906685+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4b66da0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:17.908537+00:00 + duration_ms: 27.85 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-6.yaml new file mode 100644 index 000000000..06675ce98 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-6.yaml @@ -0,0 +1,224 @@ +--- +uuid: 342e6de0-784b-470b-b020-85206dd3180c +task_id: urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-6 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: What classes are on my personal calendar today? + expect_response: + - chemistry +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What classes are on my personal calendar today? + context: + id: 01KHJQWFDYFE2NXEKAZTB4FGG2 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:18.111075+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:18.112509+00:00 + - role: user + content: What classes are on my personal calendar today? + attachments: null + created: 2026-02-16 08:08:18.111143+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f48ca400>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:18.112519+00:00 + duration_ms: 25.536 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-7.yaml new file mode 100644 index 000000000..0ac53825b --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-7.yaml @@ -0,0 +1,224 @@ +--- +uuid: 6002fe2e-3b0c-4cff-b22e-c7006871c179 +task_id: urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-7 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: What classes are on my personal calendar today? + expect_response: + - chemistry +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What classes are on my personal calendar today? + context: + id: 01KHJQWFMQMARKJZ594C5TPZ0P + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:18.327413+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:18.330557+00:00 + - role: user + content: What classes are on my personal calendar today? + attachments: null + created: 2026-02-16 08:08:18.327484+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c0f085c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:18.330568+00:00 + duration_ms: 31.073 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-8.yaml new file mode 100644 index 000000000..5b33c21ed --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-8.yaml @@ -0,0 +1,224 @@ +--- +uuid: e7f5c15c-c1e0-4b15-9a18-b5789fa703e4 +task_id: urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-8 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: What classes are on my personal calendar today? + expect_response: + - chemistry +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What classes are on my personal calendar today? + context: + id: 01KHJQWFVH093EN172DM3TSMN4 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:18.545996+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:18.548710+00:00 + - role: user + content: What classes are on my personal calendar today? + attachments: null + created: 2026-02-16 08:08:18.546066+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4dba610>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:18.548721+00:00 + duration_ms: 32.017 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-9.yaml new file mode 100644 index 000000000..3d89b7d93 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-9.yaml @@ -0,0 +1,224 @@ +--- +uuid: ee3ad87b-8418-4739-9fa5-67c99df1333b +task_id: urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-9 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: What classes are on my personal calendar today? + expect_response: + - chemistry +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What classes are on my personal calendar today? + context: + id: 01KHJQWG3H6TDE65PEQ3XA7KZ1 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:18.801723+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:18.803275+00:00 + - role: user + content: What classes are on my personal calendar today? + attachments: null + created: 2026-02-16 08:08:18.801796+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4de5fe0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:18.803285+00:00 + duration_ms: 23.474 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-0.yaml new file mode 100644 index 000000000..33fdfbe28 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-0.yaml @@ -0,0 +1,224 @@ +--- +uuid: 11a0423c-8919-49bb-8dff-e171cc5b9ea0 +task_id: urban_loft_au_calendar-what_classes_do_i_have_today-0 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: What classes do I have today? + expect_response: + - chemistry +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What classes do I have today? + context: + id: 01KHJQWBFC5Q7W00SD7VHPZVYF + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:14.060374+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:14.062374+00:00 + - role: user + content: What classes do I have today? + attachments: null + created: 2026-02-16 08:08:14.060444+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f44d8b40>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:14.062384+00:00 + duration_ms: 178.144 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-1.yaml new file mode 100644 index 000000000..2f3a018b0 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-1.yaml @@ -0,0 +1,224 @@ +--- +uuid: 068e73d7-d20d-41b0-833a-9633c12d85e0 +task_id: urban_loft_au_calendar-what_classes_do_i_have_today-1 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: What classes do I have today? + expect_response: + - chemistry +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What classes do I have today? + context: + id: 01KHJQWBVAKWHY957JRNY2WBEP + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:14.442643+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:14.444224+00:00 + - role: user + content: What classes do I have today? + attachments: null + created: 2026-02-16 08:08:14.442715+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4883ab0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:14.444235+00:00 + duration_ms: 24.814 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-2.yaml new file mode 100644 index 000000000..13288bfc6 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-2.yaml @@ -0,0 +1,224 @@ +--- +uuid: 416ffe09-5699-4943-be03-df0c2f48264e +task_id: urban_loft_au_calendar-what_classes_do_i_have_today-2 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: What classes do I have today? + expect_response: + - chemistry +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What classes do I have today? + context: + id: 01KHJQWC2G264T7EAM43P7K7HV + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:14.672382+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:14.673906+00:00 + - role: user + content: What classes do I have today? + attachments: null + created: 2026-02-16 08:08:14.672455+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1d637f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:14.673916+00:00 + duration_ms: 30.304 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-3.yaml new file mode 100644 index 000000000..613d0bf6d --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-3.yaml @@ -0,0 +1,224 @@ +--- +uuid: 475cf93e-c855-4193-963b-149d25d98284 +task_id: urban_loft_au_calendar-what_classes_do_i_have_today-3 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: What classes do I have today? + expect_response: + - chemistry +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What classes do I have today? + context: + id: 01KHJQWC8RACECMRQXN6S5CYEW + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:14.873091+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:14.874589+00:00 + - role: user + content: What classes do I have today? + attachments: null + created: 2026-02-16 08:08:14.873161+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1b31a60>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:14.874599+00:00 + duration_ms: 24.827 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-4.yaml new file mode 100644 index 000000000..a17875658 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-4.yaml @@ -0,0 +1,224 @@ +--- +uuid: 3f98326a-435a-4014-b20e-29f4a8032e0a +task_id: urban_loft_au_calendar-what_classes_do_i_have_today-4 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: What classes do I have today? + expect_response: + - chemistry +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What classes do I have today? + context: + id: 01KHJQWCF63NC9ECZPJZ6W7SVH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:15.078331+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:15.079753+00:00 + - role: user + content: What classes do I have today? + attachments: null + created: 2026-02-16 08:08:15.078401+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1be7a00>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:15.079763+00:00 + duration_ms: 26.341 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-5.yaml new file mode 100644 index 000000000..3856ff035 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-5.yaml @@ -0,0 +1,224 @@ +--- +uuid: 3ba4d2dc-7857-4d76-b84b-a42b0b45764f +task_id: urban_loft_au_calendar-what_classes_do_i_have_today-5 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: What classes do I have today? + expect_response: + - chemistry +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What classes do I have today? + context: + id: 01KHJQWCNRSH3G0V1VGF5GQEGG + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:15.288774+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:15.290719+00:00 + - role: user + content: What classes do I have today? + attachments: null + created: 2026-02-16 08:08:15.288845+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4b15dd0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:15.290729+00:00 + duration_ms: 28.647 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-6.yaml new file mode 100644 index 000000000..919027287 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-6.yaml @@ -0,0 +1,224 @@ +--- +uuid: fc9b4492-97b9-499b-aaca-56ae681937cf +task_id: urban_loft_au_calendar-what_classes_do_i_have_today-6 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: What classes do I have today? + expect_response: + - chemistry +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What classes do I have today? + context: + id: 01KHJQWCWF9E53BZ1MXN0KHRRD + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:15.503660+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:15.505718+00:00 + - role: user + content: What classes do I have today? + attachments: null + created: 2026-02-16 08:08:15.503733+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1e0c720>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:15.505728+00:00 + duration_ms: 24.262 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-7.yaml new file mode 100644 index 000000000..856334c91 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-7.yaml @@ -0,0 +1,224 @@ +--- +uuid: ea070fd1-769d-4f8a-bda2-f9668fb4e4a9 +task_id: urban_loft_au_calendar-what_classes_do_i_have_today-7 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: What classes do I have today? + expect_response: + - chemistry +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What classes do I have today? + context: + id: 01KHJQWD32V90DGVYPQ0V081SQ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:15.714534+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:15.718757+00:00 + - role: user + content: What classes do I have today? + attachments: null + created: 2026-02-16 08:08:15.714605+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f461ba00>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:15.718768+00:00 + duration_ms: 34.465 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-8.yaml new file mode 100644 index 000000000..4d9c9228a --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-8.yaml @@ -0,0 +1,224 @@ +--- +uuid: 8d00722e-fa6a-4930-a936-5a90442856b4 +task_id: urban_loft_au_calendar-what_classes_do_i_have_today-8 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: What classes do I have today? + expect_response: + - chemistry +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What classes do I have today? + context: + id: 01KHJQWDAYZTCYJEN2VF2S87FH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:15.967083+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:15.978533+00:00 + - role: user + content: What classes do I have today? + attachments: null + created: 2026-02-16 08:08:15.967157+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4f68300>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:15.978548+00:00 + duration_ms: 187.946 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-9.yaml new file mode 100644 index 000000000..07ba190a7 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-what_classes_do_i_have_today-9.yaml @@ -0,0 +1,224 @@ +--- +uuid: f3f1b907-b660-4058-b728-541740e2bf2a +task_id: urban_loft_au_calendar-what_classes_do_i_have_today-9 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: What classes do I have today? + expect_response: + - chemistry +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What classes do I have today? + context: + id: 01KHJQWDP6VKJXFKZYEZK37X65 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:16.326183+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:16.327774+00:00 + - role: user + content: What classes do I have today? + attachments: null + created: 2026-02-16 08:08:16.326283+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f50b8bf0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:16.327784+00:00 + duration_ms: 32.786 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-0.yaml new file mode 100644 index 000000000..9c54e5852 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-0.yaml @@ -0,0 +1,224 @@ +--- +uuid: 684be5bf-46a9-445f-8dfc-7936e83ab75c +task_id: urban_loft_au_calendar-who_am_i_meeting_for_dinner-0 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Who am I meeting for dinner? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who am I meeting for dinner? + context: + id: 01KHJQWG9MD1E7BT9BRRGQJ0SM + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:18.996761+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:18.998258+00:00 + - role: user + content: Who am I meeting for dinner? + attachments: null + created: 2026-02-16 08:08:18.996831+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c17361f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:18.998268+00:00 + duration_ms: 22.211 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-1.yaml new file mode 100644 index 000000000..f0abce5e7 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-1.yaml @@ -0,0 +1,224 @@ +--- +uuid: 838ec018-7927-408d-aeb2-e7acc7c918d9 +task_id: urban_loft_au_calendar-who_am_i_meeting_for_dinner-1 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Who am I meeting for dinner? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who am I meeting for dinner? + context: + id: 01KHJQWGFNJZ5NY9B7QNQ4CGE2 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:19.189595+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:19.191382+00:00 + - role: user + content: Who am I meeting for dinner? + attachments: null + created: 2026-02-16 08:08:19.189666+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c16f9640>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:19.191393+00:00 + duration_ms: 42.864 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-2.yaml new file mode 100644 index 000000000..93321c42c --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-2.yaml @@ -0,0 +1,224 @@ +--- +uuid: a627b110-4c1a-4bf3-b836-2d4bc87b736d +task_id: urban_loft_au_calendar-who_am_i_meeting_for_dinner-2 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Who am I meeting for dinner? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who am I meeting for dinner? + context: + id: 01KHJQWGPARQBT39G2934MZK4W + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:19.402821+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:19.404645+00:00 + - role: user + content: Who am I meeting for dinner? + attachments: null + created: 2026-02-16 08:08:19.402891+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1e72da0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:19.404654+00:00 + duration_ms: 25.698 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-3.yaml new file mode 100644 index 000000000..b9d323da1 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-3.yaml @@ -0,0 +1,224 @@ +--- +uuid: 4c9fa456-6c16-448f-802b-b12b5543adb2 +task_id: urban_loft_au_calendar-who_am_i_meeting_for_dinner-3 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Who am I meeting for dinner? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who am I meeting for dinner? + context: + id: 01KHJQWGWSJQMAM1CRHD59HR7T + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:19.609731+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:19.611532+00:00 + - role: user + content: Who am I meeting for dinner? + attachments: null + created: 2026-02-16 08:08:19.609801+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4633320>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:19.611542+00:00 + duration_ms: 32.709 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-4.yaml new file mode 100644 index 000000000..89905f598 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-4.yaml @@ -0,0 +1,224 @@ +--- +uuid: fe1044ec-c324-4ffc-8c8e-b3bfb40a0268 +task_id: urban_loft_au_calendar-who_am_i_meeting_for_dinner-4 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Who am I meeting for dinner? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who am I meeting for dinner? + context: + id: 01KHJQWH3K5HQSV7X2NBYN9MGM + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:19.827752+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:19.830038+00:00 + - role: user + content: Who am I meeting for dinner? + attachments: null + created: 2026-02-16 08:08:19.827822+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4dfd380>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:19.830048+00:00 + duration_ms: 171.098 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-5.yaml new file mode 100644 index 000000000..8e01a2eb4 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-5.yaml @@ -0,0 +1,224 @@ +--- +uuid: 41f90f11-594a-40de-9110-f878de44c095 +task_id: urban_loft_au_calendar-who_am_i_meeting_for_dinner-5 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Who am I meeting for dinner? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who am I meeting for dinner? + context: + id: 01KHJQWHF4BJZ72B8Z1A79TM9V + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:20.197025+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:20.198864+00:00 + - role: user + content: Who am I meeting for dinner? + attachments: null + created: 2026-02-16 08:08:20.197096+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c0e6e6c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:20.198874+00:00 + duration_ms: 23.921 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-6.yaml new file mode 100644 index 000000000..f3e0cc6e4 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-6.yaml @@ -0,0 +1,224 @@ +--- +uuid: 72d343bf-7ea2-48c6-86bd-d5104f719c9b +task_id: urban_loft_au_calendar-who_am_i_meeting_for_dinner-6 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Who am I meeting for dinner? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who am I meeting for dinner? + context: + id: 01KHJQWHN21CQ1FN2PV1FZTB83 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:20.386812+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:20.389000+00:00 + - role: user + content: Who am I meeting for dinner? + attachments: null + created: 2026-02-16 08:08:20.386881+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1fcb950>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:20.389010+00:00 + duration_ms: 170.985 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-7.yaml new file mode 100644 index 000000000..70ee35733 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-7.yaml @@ -0,0 +1,224 @@ +--- +uuid: 02701f9c-c2a4-4d68-ba3c-1088e1c853e8 +task_id: urban_loft_au_calendar-who_am_i_meeting_for_dinner-7 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Who am I meeting for dinner? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who am I meeting for dinner? + context: + id: 01KHJQWJ1SGKT1W2Q1T4JEYYR3 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:20.793345+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:20.794946+00:00 + - role: user + content: Who am I meeting for dinner? + attachments: null + created: 2026-02-16 08:08:20.793417+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1d635e0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:20.794955+00:00 + duration_ms: 194.586 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-8.yaml new file mode 100644 index 000000000..9387a6dfc --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-8.yaml @@ -0,0 +1,224 @@ +--- +uuid: c0ee26ac-fc6f-45fc-b46f-2671581b4169 +task_id: urban_loft_au_calendar-who_am_i_meeting_for_dinner-8 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Who am I meeting for dinner? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who am I meeting for dinner? + context: + id: 01KHJQWJDPC14XT2X9ZM4W8VDT + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:21.174925+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:21.176530+00:00 + - role: user + content: Who am I meeting for dinner? + attachments: null + created: 2026-02-16 08:08:21.174994+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f468bcc0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:21.176540+00:00 + duration_ms: 166.754 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-9.yaml new file mode 100644 index 000000000..241ae3e29 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_calendar-who_am_i_meeting_for_dinner-9.yaml @@ -0,0 +1,224 @@ +--- +uuid: d6e8c61b-e76e-4477-b156-316bfdb441a7 +task_id: urban_loft_au_calendar-who_am_i_meeting_for_dinner-9 +model_id: gemma-3-27b-it +category: +- calendar +- question +task: + input_text: Who am I meeting for dinner? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who am I meeting for dinner? + context: + id: 01KHJQWJR40N3ZEW2X1QNCG5HF + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:21.508590+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:21.516214+00:00 + - role: user + content: Who am I meeting for dinner? + attachments: null + created: 2026-02-16 08:08:21.508661+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2f12c40>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:21.516225+00:00 + duration_ms: 175.872 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-0.yaml new file mode 100644 index 000000000..8b64a1302 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-0.yaml @@ -0,0 +1,227 @@ +--- +uuid: f40a725b-8889-4aba-8810-1354ed622874 +task_id: urban_loft_au_light-how_many_lights_are_currently_on-0 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: How many lights are currently on? + expect_response: + - '2' + - two + - bedroom + - balcony +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many lights are currently on? + context: + id: 01KHJQXCDTFTV1VFQCMX15AK2M + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:47.802625+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:47.805665+00:00 + - role: user + content: How many lights are currently on? + attachments: null + created: 2026-02-16 08:08:47.802695+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1e7c720>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:47.805675+00:00 + duration_ms: 25.629 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-1.yaml new file mode 100644 index 000000000..90f18872d --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-1.yaml @@ -0,0 +1,227 @@ +--- +uuid: 754a823b-c053-455e-b084-79460a033157 +task_id: urban_loft_au_light-how_many_lights_are_currently_on-1 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: How many lights are currently on? + expect_response: + - '2' + - two + - bedroom + - balcony +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many lights are currently on? + context: + id: 01KHJQXCKZ4W6MA12PB6KYYA7B + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:47.999224+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:48.002490+00:00 + - role: user + content: How many lights are currently on? + attachments: null + created: 2026-02-16 08:08:47.999320+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47d4485d20>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:48.002500+00:00 + duration_ms: 173.832 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-2.yaml new file mode 100644 index 000000000..08c15c9fb --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-2.yaml @@ -0,0 +1,227 @@ +--- +uuid: aec04023-e68a-4438-bca8-a1ffcacd915c +task_id: urban_loft_au_light-how_many_lights_are_currently_on-2 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: How many lights are currently on? + expect_response: + - '2' + - two + - bedroom + - balcony +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many lights are currently on? + context: + id: 01KHJQXCZMF91YJMP483H3BK1T + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:48.372734+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:48.374310+00:00 + - role: user + content: How many lights are currently on? + attachments: null + created: 2026-02-16 08:08:48.372805+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c3f91bc0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:48.374320+00:00 + duration_ms: 24.107 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-3.yaml new file mode 100644 index 000000000..fb47b8c91 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-3.yaml @@ -0,0 +1,227 @@ +--- +uuid: 3e6a27c3-5a8e-46c1-b810-d16c38a186f0 +task_id: urban_loft_au_light-how_many_lights_are_currently_on-3 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: How many lights are currently on? + expect_response: + - '2' + - two + - bedroom + - balcony +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many lights are currently on? + context: + id: 01KHJQXD61AX0ZEAFJ1V0XT6GA + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:48.578071+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:48.579642+00:00 + - role: user + content: How many lights are currently on? + attachments: null + created: 2026-02-16 08:08:48.578143+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1a85f30>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:48.579652+00:00 + duration_ms: 165.841 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-4.yaml new file mode 100644 index 000000000..86ed91f69 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-4.yaml @@ -0,0 +1,227 @@ +--- +uuid: 39bf92b0-5b0e-4ca5-b44e-bee09669e5b1 +task_id: urban_loft_au_light-how_many_lights_are_currently_on-4 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: How many lights are currently on? + expect_response: + - '2' + - two + - bedroom + - balcony +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many lights are currently on? + context: + id: 01KHJQXDGYFQTHP6V1V4RGDTK0 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:48.927066+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:48.928537+00:00 + - role: user + content: How many lights are currently on? + attachments: null + created: 2026-02-16 08:08:48.927138+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c3fcc880>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:48.928546+00:00 + duration_ms: 28.438 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-5.yaml new file mode 100644 index 000000000..7148149a8 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-5.yaml @@ -0,0 +1,227 @@ +--- +uuid: 48eb9960-3dcf-4b03-ad39-9855563ad9ab +task_id: urban_loft_au_light-how_many_lights_are_currently_on-5 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: How many lights are currently on? + expect_response: + - '2' + - two + - bedroom + - balcony +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many lights are currently on? + context: + id: 01KHJQXDQS3BHYFHAVB2P0KPEN + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:49.145690+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:49.147505+00:00 + - role: user + content: How many lights are currently on? + attachments: null + created: 2026-02-16 08:08:49.145759+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c3c8f060>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:49.147515+00:00 + duration_ms: 35.538 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-6.yaml new file mode 100644 index 000000000..c19530989 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-6.yaml @@ -0,0 +1,227 @@ +--- +uuid: 1ea0dfb0-0bbe-40e5-ab2e-65def6968519 +task_id: urban_loft_au_light-how_many_lights_are_currently_on-6 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: How many lights are currently on? + expect_response: + - '2' + - two + - bedroom + - balcony +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many lights are currently on? + context: + id: 01KHJQXDYX2QS41E54CZPKT8F3 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:49.373860+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:49.377977+00:00 + - role: user + content: How many lights are currently on? + attachments: null + created: 2026-02-16 08:08:49.373929+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4a6a140>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:49.377988+00:00 + duration_ms: 30.688 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-7.yaml new file mode 100644 index 000000000..cf26015cd --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-7.yaml @@ -0,0 +1,227 @@ +--- +uuid: 44420970-2049-4e23-a036-5820659c947c +task_id: urban_loft_au_light-how_many_lights_are_currently_on-7 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: How many lights are currently on? + expect_response: + - '2' + - two + - bedroom + - balcony +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many lights are currently on? + context: + id: 01KHJQXE5GCV4B2AV3ZM3ETRF2 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:49.584281+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:49.585814+00:00 + - role: user + content: How many lights are currently on? + attachments: null + created: 2026-02-16 08:08:49.584354+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1853c10>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:49.585823+00:00 + duration_ms: 31.133 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-8.yaml new file mode 100644 index 000000000..0c22ff193 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-8.yaml @@ -0,0 +1,227 @@ +--- +uuid: 9d3a7628-b633-4ed8-a857-e40610c3c4ea +task_id: urban_loft_au_light-how_many_lights_are_currently_on-8 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: How many lights are currently on? + expect_response: + - '2' + - two + - bedroom + - balcony +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many lights are currently on? + context: + id: 01KHJQXEBVVGCTHV86WS2WFEQZ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:49.787443+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:49.789299+00:00 + - role: user + content: How many lights are currently on? + attachments: null + created: 2026-02-16 08:08:49.787511+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c26f1d20>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:49.789309+00:00 + duration_ms: 22.205 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-9.yaml new file mode 100644 index 000000000..a1370186b --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-how_many_lights_are_currently_on-9.yaml @@ -0,0 +1,227 @@ +--- +uuid: 15f88efa-4a66-486c-bba8-5338a58d6b55 +task_id: urban_loft_au_light-how_many_lights_are_currently_on-9 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: How many lights are currently on? + expect_response: + - '2' + - two + - bedroom + - balcony +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many lights are currently on? + context: + id: 01KHJQXEJ26E9SETNQQZ3EJ7A5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:49.986625+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:49.988050+00:00 + - role: user + content: How many lights are currently on? + attachments: null + created: 2026-02-16 08:08:49.986692+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f459d7a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:49.988059+00:00 + duration_ms: 167.461 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-0.yaml new file mode 100644 index 000000000..8af05904a --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-0.yaml @@ -0,0 +1,225 @@ +--- +uuid: e397a505-a34d-46d1-b052-880d036139c6 +task_id: urban_loft_au_light-is_the_bedroom_light_off-0 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light off? + expect_response: + - 'no' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light off? + context: + id: 01KHJQXH96EPD5AWEEGG0A3T81 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:52.774889+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:52.776782+00:00 + - role: user + content: Is the bedroom light off? + attachments: null + created: 2026-02-16 08:08:52.774958+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f47b4510>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:52.776792+00:00 + duration_ms: 29.985 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-1.yaml new file mode 100644 index 000000000..97be545f5 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-1.yaml @@ -0,0 +1,225 @@ +--- +uuid: ea23d384-8852-44f1-b2eb-6d4b43851e51 +task_id: urban_loft_au_light-is_the_bedroom_light_off-1 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light off? + expect_response: + - 'no' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light off? + context: + id: 01KHJQXHFXV2B83K4PB062VP7X + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:52.989731+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:52.991267+00:00 + - role: user + content: Is the bedroom light off? + attachments: null + created: 2026-02-16 08:08:52.989802+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1b2fcc0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:52.991276+00:00 + duration_ms: 22.898 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-2.yaml new file mode 100644 index 000000000..a81abb316 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-2.yaml @@ -0,0 +1,225 @@ +--- +uuid: 8fc1a27b-a2a4-411e-bdb4-c8ce241af63a +task_id: urban_loft_au_light-is_the_bedroom_light_off-2 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light off? + expect_response: + - 'no' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light off? + context: + id: 01KHJQXHPKNENKA98EAW7CK6V0 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:53.204031+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:53.205620+00:00 + - role: user + content: Is the bedroom light off? + attachments: null + created: 2026-02-16 08:08:53.204103+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1e7e610>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:53.205630+00:00 + duration_ms: 27.919 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-3.yaml new file mode 100644 index 000000000..b60be4531 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-3.yaml @@ -0,0 +1,225 @@ +--- +uuid: 2c095b67-19b4-420b-a323-97b844e4cf4e +task_id: urban_loft_au_light-is_the_bedroom_light_off-3 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light off? + expect_response: + - 'no' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light off? + context: + id: 01KHJQXHX7E6V0PFK72CFM29R5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:53.415305+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:53.416885+00:00 + - role: user + content: Is the bedroom light off? + attachments: null + created: 2026-02-16 08:08:53.415400+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1bdb5e0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:53.416894+00:00 + duration_ms: 22.83 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-4.yaml new file mode 100644 index 000000000..3adf28a3e --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-4.yaml @@ -0,0 +1,225 @@ +--- +uuid: fefeeb35-99e6-420c-af78-9c0839899986 +task_id: urban_loft_au_light-is_the_bedroom_light_off-4 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light off? + expect_response: + - 'no' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light off? + context: + id: 01KHJQXJ38GX65Q05AJVC8MJYM + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:53.608925+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:53.650553+00:00 + - role: user + content: Is the bedroom light off? + attachments: null + created: 2026-02-16 08:08:53.608996+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1cd52d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:53.650571+00:00 + duration_ms: 63.721 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-5.yaml new file mode 100644 index 000000000..33aad0676 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-5.yaml @@ -0,0 +1,225 @@ +--- +uuid: bc6dfcfc-a66b-4c02-8b62-bf40dee76f71 +task_id: urban_loft_au_light-is_the_bedroom_light_off-5 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light off? + expect_response: + - 'no' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light off? + context: + id: 01KHJQXJAYJ081ZP7XBEFDGA9X + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:53.854712+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:53.856316+00:00 + - role: user + content: Is the bedroom light off? + attachments: null + created: 2026-02-16 08:08:53.854794+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2f11f30>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:53.856325+00:00 + duration_ms: 24.868 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-6.yaml new file mode 100644 index 000000000..6d4986e4b --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-6.yaml @@ -0,0 +1,225 @@ +--- +uuid: 937b61e0-9398-42ee-865e-d523a748bfa3 +task_id: urban_loft_au_light-is_the_bedroom_light_off-6 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light off? + expect_response: + - 'no' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light off? + context: + id: 01KHJQXJHA6ZZ3DAP618EXKKZQ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:54.058727+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:54.060182+00:00 + - role: user + content: Is the bedroom light off? + attachments: null + created: 2026-02-16 08:08:54.058797+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c3d50930>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:54.060192+00:00 + duration_ms: 30.577 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-7.yaml new file mode 100644 index 000000000..4c91dee92 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-7.yaml @@ -0,0 +1,225 @@ +--- +uuid: 978373d2-c29c-4154-892a-c3dc529e0e5d +task_id: urban_loft_au_light-is_the_bedroom_light_off-7 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light off? + expect_response: + - 'no' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light off? + context: + id: 01KHJQXJQPJY01CRZXX3MBFJQ8 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:54.262702+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:54.264211+00:00 + - role: user + content: Is the bedroom light off? + attachments: null + created: 2026-02-16 08:08:54.262770+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2cd7c10>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:54.264220+00:00 + duration_ms: 21.409 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-8.yaml new file mode 100644 index 000000000..6f07c1d02 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-8.yaml @@ -0,0 +1,225 @@ +--- +uuid: 2eb984f6-58dc-4c23-9974-0599cab30b07 +task_id: urban_loft_au_light-is_the_bedroom_light_off-8 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light off? + expect_response: + - 'no' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light off? + context: + id: 01KHJQXJYXTQA9E0X0FAX232NH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:54.493645+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:54.495602+00:00 + - role: user + content: Is the bedroom light off? + attachments: null + created: 2026-02-16 08:08:54.493716+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f45641a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:54.495612+00:00 + duration_ms: 171.697 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-9.yaml new file mode 100644 index 000000000..3900e628e --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off-9.yaml @@ -0,0 +1,225 @@ +--- +uuid: 4b9ad0ad-83c4-4874-9614-6faef5b032fa +task_id: urban_loft_au_light-is_the_bedroom_light_off-9 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light off? + expect_response: + - 'no' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light off? + context: + id: 01KHJQXKA4D6M72ZM2922EPZ1T + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:54.852498+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:54.855021+00:00 + - role: user + content: Is the bedroom light off? + attachments: null + created: 2026-02-16 08:08:54.852571+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c3c9b5e0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:54.855031+00:00 + duration_ms: 177.497 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-0.yaml new file mode 100644 index 000000000..7df1ea905 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-0.yaml @@ -0,0 +1,225 @@ +--- +uuid: 26ed3b8a-53b2-48cd-812e-2d87c9fe00be +task_id: urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-0 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light off? Please answer with "yes" or "no". + expect_response: + - 'yes' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light off? Please answer with "yes" or "no". + context: + id: 01KHJQXPCJFPE6VVE1K4ZF5DEX + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:58.003071+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:58.005056+00:00 + - role: user + content: Is the bedroom light off? Please answer with "yes" or "no". + attachments: null + created: 2026-02-16 08:08:58.003141+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1adaae0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:58.005066+00:00 + duration_ms: 170.534 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-1.yaml new file mode 100644 index 000000000..1fae98f81 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-1.yaml @@ -0,0 +1,225 @@ +--- +uuid: 0ad29841-8022-45b0-bf1f-4bdbffec9d36 +task_id: urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-1 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light off? Please answer with "yes" or "no". + expect_response: + - 'yes' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light off? Please answer with "yes" or "no". + context: + id: 01KHJQXPQ9YXSM157H3J6W8BQ5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:58.345164+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:58.348033+00:00 + - role: user + content: Is the bedroom light off? Please answer with "yes" or "no". + attachments: null + created: 2026-02-16 08:08:58.345232+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1bf1dd0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:58.348043+00:00 + duration_ms: 28.233 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-2.yaml new file mode 100644 index 000000000..266c43312 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-2.yaml @@ -0,0 +1,225 @@ +--- +uuid: a30906c3-09da-4d02-bee9-201ca752da06 +task_id: urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-2 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light off? Please answer with "yes" or "no". + expect_response: + - 'yes' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light off? Please answer with "yes" or "no". + context: + id: 01KHJQXPY02M4T1XDEHQZQ5GX3 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:58.560475+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:58.562859+00:00 + - role: user + content: Is the bedroom light off? Please answer with "yes" or "no". + attachments: null + created: 2026-02-16 08:08:58.560545+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2bceda0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:58.562869+00:00 + duration_ms: 31.62 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-3.yaml new file mode 100644 index 000000000..02353b96d --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-3.yaml @@ -0,0 +1,225 @@ +--- +uuid: 1816d603-0940-4ab1-b63d-560b5c99c64b +task_id: urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-3 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light off? Please answer with "yes" or "no". + expect_response: + - 'yes' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light off? Please answer with "yes" or "no". + context: + id: 01KHJQXQ4PKP2CP0HKPVXEP6EE + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:58.774656+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:58.776734+00:00 + - role: user + content: Is the bedroom light off? Please answer with "yes" or "no". + attachments: null + created: 2026-02-16 08:08:58.774730+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c16acf60>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:58.776744+00:00 + duration_ms: 26.734 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-4.yaml new file mode 100644 index 000000000..52132f5c4 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-4.yaml @@ -0,0 +1,225 @@ +--- +uuid: 2f7ce246-911b-436c-83f7-39a1ffb89a9a +task_id: urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-4 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light off? Please answer with "yes" or "no". + expect_response: + - 'yes' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light off? Please answer with "yes" or "no". + context: + id: 01KHJQXQBF3BF2BARB9CWGA3HB + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:58.991197+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:58.993921+00:00 + - role: user + content: Is the bedroom light off? Please answer with "yes" or "no". + attachments: null + created: 2026-02-16 08:08:58.991296+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f45c01a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:58.993931+00:00 + duration_ms: 246.42 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-5.yaml new file mode 100644 index 000000000..16cdf67bc --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-5.yaml @@ -0,0 +1,225 @@ +--- +uuid: 7da68c50-096c-4c62-9765-2fdf5052ee15 +task_id: urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-5 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light off? Please answer with "yes" or "no". + expect_response: + - 'yes' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light off? Please answer with "yes" or "no". + context: + id: 01KHJQXQRMWY92Q9D9ZMB6SC8F + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:59.412983+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:59.420071+00:00 + - role: user + content: Is the bedroom light off? Please answer with "yes" or "no". + attachments: null + created: 2026-02-16 08:08:59.413054+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4946400>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:59.420085+00:00 + duration_ms: 179.723 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-6.yaml new file mode 100644 index 000000000..127bd2a06 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-6.yaml @@ -0,0 +1,225 @@ +--- +uuid: 770633f8-7f08-4e60-b5d4-ad1b0c500679 +task_id: urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-6 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light off? Please answer with "yes" or "no". + expect_response: + - 'yes' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light off? Please answer with "yes" or "no". + context: + id: 01KHJQXR3MCTT7B04DWFSRPP4V + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:59.765008+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:59.769662+00:00 + - role: user + content: Is the bedroom light off? Please answer with "yes" or "no". + attachments: null + created: 2026-02-16 08:08:59.765079+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1c5b690>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:59.769673+00:00 + duration_ms: 174.219 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-7.yaml new file mode 100644 index 000000000..96fd2c14d --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-7.yaml @@ -0,0 +1,225 @@ +--- +uuid: 1d8193b2-0ae7-4c08-a243-9ad8f5f14cb3 +task_id: urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-7 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light off? Please answer with "yes" or "no". + expect_response: + - 'yes' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light off? Please answer with "yes" or "no". + context: + id: 01KHJQXRF2E4140WEFPKQ70JB2 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:00.130237+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:00.132794+00:00 + - role: user + content: Is the bedroom light off? Please answer with "yes" or "no". + attachments: null + created: 2026-02-16 08:09:00.130331+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1e9bcc0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:00.132804+00:00 + duration_ms: 175.868 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-8.yaml new file mode 100644 index 000000000..bfb7e75af --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-8.yaml @@ -0,0 +1,225 @@ +--- +uuid: 2e6bb0cd-c124-495f-9a0b-18ced15aa29e +task_id: urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-8 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light off? Please answer with "yes" or "no". + expect_response: + - 'yes' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light off? Please answer with "yes" or "no". + context: + id: 01KHJQXRX8Y2RC9N3ZKCJ0N7RR + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:00.584919+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:00.586521+00:00 + - role: user + content: Is the bedroom light off? Please answer with "yes" or "no". + attachments: null + created: 2026-02-16 08:09:00.584989+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2547270>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:00.586530+00:00 + duration_ms: 23.049 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-9.yaml new file mode 100644 index 000000000..a8ea7765a --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-9.yaml @@ -0,0 +1,225 @@ +--- +uuid: fd0f023b-9d17-4f73-9eee-280824037913 +task_id: urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-9 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light off? Please answer with "yes" or "no". + expect_response: + - 'yes' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light off? Please answer with "yes" or "no". + context: + id: 01KHJQXS33GZ7VS5B0RKSEXX02 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:00.771699+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:00.773173+00:00 + - role: user + content: Is the bedroom light off? Please answer with "yes" or "no". + attachments: null + created: 2026-02-16 08:09:00.771775+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c3b44510>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:00.773184+00:00 + duration_ms: 164.513 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-0.yaml new file mode 100644 index 000000000..f6cf0297b --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-0.yaml @@ -0,0 +1,225 @@ +--- +uuid: de2ce6e3-3f32-443e-83d9-55a41a782b51 +task_id: urban_loft_au_light-is_the_bedroom_light_on-0 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light on? + expect_response: + - 'yes' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light on? + context: + id: 01KHJQXEXGFMN8T91NCWQ2KZY6 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:50.352752+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:50.359097+00:00 + - role: user + content: Is the bedroom light on? + attachments: null + created: 2026-02-16 08:08:50.352823+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c17357a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:50.359107+00:00 + duration_ms: 33.514 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-1.yaml new file mode 100644 index 000000000..6c4f8b93b --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-1.yaml @@ -0,0 +1,225 @@ +--- +uuid: 1e7902ca-e596-4107-82f5-adf467aa805f +task_id: urban_loft_au_light-is_the_bedroom_light_on-1 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light on? + expect_response: + - 'yes' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light on? + context: + id: 01KHJQXF3RB7X6E6D3Y3B3GVDV + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:50.552518+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:50.558986+00:00 + - role: user + content: Is the bedroom light on? + attachments: null + created: 2026-02-16 08:08:50.552590+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f489a2a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:50.558998+00:00 + duration_ms: 29.113 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-2.yaml new file mode 100644 index 000000000..cfea65610 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-2.yaml @@ -0,0 +1,225 @@ +--- +uuid: 1d8a63b6-c000-4ba8-965f-a65b9be4bd97 +task_id: urban_loft_au_light-is_the_bedroom_light_on-2 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light on? + expect_response: + - 'yes' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light on? + context: + id: 01KHJQXFA1W6GMDMAXPGTFFG76 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:50.753941+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:50.758415+00:00 + - role: user + content: Is the bedroom light on? + attachments: null + created: 2026-02-16 08:08:50.754011+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4daa090>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:50.758427+00:00 + duration_ms: 27.216 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-3.yaml new file mode 100644 index 000000000..30f0eb623 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-3.yaml @@ -0,0 +1,225 @@ +--- +uuid: d86ef37f-b351-43e8-8fa8-91a701e23213 +task_id: urban_loft_au_light-is_the_bedroom_light_on-3 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light on? + expect_response: + - 'yes' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light on? + context: + id: 01KHJQXFGD43CKZQTPGQ1MMBR6 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:50.957799+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:50.999943+00:00 + - role: user + content: Is the bedroom light on? + attachments: null + created: 2026-02-16 08:08:50.957868+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c27b62a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:50.999963+00:00 + duration_ms: 71.346 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-4.yaml new file mode 100644 index 000000000..61ca2b264 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-4.yaml @@ -0,0 +1,225 @@ +--- +uuid: 12b8167e-0a74-45a2-b89e-72393af843ad +task_id: urban_loft_au_light-is_the_bedroom_light_on-4 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light on? + expect_response: + - 'yes' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light on? + context: + id: 01KHJQXFRGDCZA5VRP36S206ZB + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:51.216338+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:51.217905+00:00 + - role: user + content: Is the bedroom light on? + attachments: null + created: 2026-02-16 08:08:51.216418+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4dc1dd0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:51.217914+00:00 + duration_ms: 26.463 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-5.yaml new file mode 100644 index 000000000..74291065e --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-5.yaml @@ -0,0 +1,225 @@ +--- +uuid: 2a8c7ebc-cbda-42eb-a5d6-e0ee9b33e196 +task_id: urban_loft_au_light-is_the_bedroom_light_on-5 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light on? + expect_response: + - 'yes' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light on? + context: + id: 01KHJQXFZ3X2YV782ARKGPFW92 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:51.427722+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:51.429227+00:00 + - role: user + content: Is the bedroom light on? + attachments: null + created: 2026-02-16 08:08:51.427796+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1f34f60>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:51.429236+00:00 + duration_ms: 178.097 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-6.yaml new file mode 100644 index 000000000..07ba7bdb6 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-6.yaml @@ -0,0 +1,225 @@ +--- +uuid: 104cc0f5-329e-470e-aee8-b3b44fabdbe3 +task_id: urban_loft_au_light-is_the_bedroom_light_on-6 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light on? + expect_response: + - 'yes' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light on? + context: + id: 01KHJQXGA9XRFJ986H35TKPZR4 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:51.785231+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:51.786669+00:00 + - role: user + content: Is the bedroom light on? + attachments: null + created: 2026-02-16 08:08:51.785339+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c253e2a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:51.786679+00:00 + duration_ms: 33.048 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-7.yaml new file mode 100644 index 000000000..48492bbad --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-7.yaml @@ -0,0 +1,225 @@ +--- +uuid: 7f1d30f4-2a3e-4c75-8524-d62489bad745 +task_id: urban_loft_au_light-is_the_bedroom_light_on-7 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light on? + expect_response: + - 'yes' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light on? + context: + id: 01KHJQXGH7KXHXF02707HHMX5T + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:52.008008+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:52.009877+00:00 + - role: user + content: Is the bedroom light on? + attachments: null + created: 2026-02-16 08:08:52.008077+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c281aa30>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:52.009886+00:00 + duration_ms: 26.228 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-8.yaml new file mode 100644 index 000000000..32be315de --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-8.yaml @@ -0,0 +1,225 @@ +--- +uuid: 59c0a752-a386-4c60-8215-b4be5c104a7d +task_id: urban_loft_au_light-is_the_bedroom_light_on-8 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light on? + expect_response: + - 'yes' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light on? + context: + id: 01KHJQXGQA0EX9RMWFZ3B99MKN + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:52.202584+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:52.204490+00:00 + - role: user + content: Is the bedroom light on? + attachments: null + created: 2026-02-16 08:08:52.202653+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c17a0250>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:52.204499+00:00 + duration_ms: 171.892 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-9.yaml new file mode 100644 index 000000000..0eb0a7fb5 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on-9.yaml @@ -0,0 +1,225 @@ +--- +uuid: 7d00977c-219f-49d1-8893-841b155933cd +task_id: urban_loft_au_light-is_the_bedroom_light_on-9 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light on? + expect_response: + - 'yes' + - 'on' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light on? + context: + id: 01KHJQXH215PD61VB6BF5PQ23Z + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:52.545446+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:52.548651+00:00 + - role: user + content: Is the bedroom light on? + attachments: null + created: 2026-02-16 08:08:52.545518+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c0e11640>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:52.548661+00:00 + duration_ms: 25.78 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-0.yaml new file mode 100644 index 000000000..fcbb51415 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-0.yaml @@ -0,0 +1,225 @@ +--- +uuid: 1b75e74f-8f90-4cd8-8f85-2b9111cd1ec7 +task_id: urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-0 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light on? Please answer "yes" or "no". + expect_response: + - 'no' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light on? Please answer "yes" or "no". + context: + id: 01KHJQXKNZY7ETQRWYEZSP8TY5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:55.231603+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:55.234173+00:00 + - role: user + content: Is the bedroom light on? Please answer "yes" or "no". + attachments: null + created: 2026-02-16 08:08:55.231676+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f63ae560>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:55.234184+00:00 + duration_ms: 31.113 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-1.yaml new file mode 100644 index 000000000..78e76a037 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-1.yaml @@ -0,0 +1,225 @@ +--- +uuid: e5e8ddb6-899f-4783-87de-fdbb828a2e94 +task_id: urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-1 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light on? Please answer "yes" or "no". + expect_response: + - 'no' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light on? Please answer "yes" or "no". + context: + id: 01KHJQXKWHJ2WHBA562Q1R67R6 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:55.441301+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:55.443759+00:00 + - role: user + content: Is the bedroom light on? Please answer "yes" or "no". + attachments: null + created: 2026-02-16 08:08:55.441383+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f45bb320>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:55.443768+00:00 + duration_ms: 23.711 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-2.yaml new file mode 100644 index 000000000..3ab40da23 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-2.yaml @@ -0,0 +1,225 @@ +--- +uuid: 3aa89635-84d1-477a-a300-9b24a0675c53 +task_id: urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-2 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light on? Please answer "yes" or "no". + expect_response: + - 'no' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light on? Please answer "yes" or "no". + context: + id: 01KHJQXM2HAREMRZ6KWG4DEKF0 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:55.633545+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:55.639947+00:00 + - role: user + content: Is the bedroom light on? Please answer "yes" or "no". + attachments: null + created: 2026-02-16 08:08:55.633616+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47d4162cf0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:55.639959+00:00 + duration_ms: 232.962 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-3.yaml new file mode 100644 index 000000000..48cb11613 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-3.yaml @@ -0,0 +1,225 @@ +--- +uuid: 9aeb8d7a-d361-4153-a8c7-54315ee73efc +task_id: urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-3 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light on? Please answer "yes" or "no". + expect_response: + - 'no' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light on? Please answer "yes" or "no". + context: + id: 01KHJQXMF95TFT5MNJ41KKY14V + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:56.041606+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:56.047976+00:00 + - role: user + content: Is the bedroom light on? Please answer "yes" or "no". + attachments: null + created: 2026-02-16 08:08:56.041678+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f63d8880>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:56.047989+00:00 + duration_ms: 30.503 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-4.yaml new file mode 100644 index 000000000..45f95599d --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-4.yaml @@ -0,0 +1,225 @@ +--- +uuid: c195d4cb-9736-4d30-91db-765fd3f060cc +task_id: urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-4 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light on? Please answer "yes" or "no". + expect_response: + - 'no' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light on? Please answer "yes" or "no". + context: + id: 01KHJQXMNPDS7HSMN4F327V6QT + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:56.246401+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:56.251801+00:00 + - role: user + content: Is the bedroom light on? Please answer "yes" or "no". + attachments: null + created: 2026-02-16 08:08:56.246478+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2cb4040>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:56.251818+00:00 + duration_ms: 29.286 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-5.yaml new file mode 100644 index 000000000..87172cb66 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-5.yaml @@ -0,0 +1,225 @@ +--- +uuid: 0dd064ec-5b68-4f1c-87b9-91725fe6a57f +task_id: urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-5 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light on? Please answer "yes" or "no". + expect_response: + - 'no' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light on? Please answer "yes" or "no". + context: + id: 01KHJQXMW10MZTCH6CJPS31G3H + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:56.449605+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:56.452258+00:00 + - role: user + content: Is the bedroom light on? Please answer "yes" or "no". + attachments: null + created: 2026-02-16 08:08:56.449674+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f50eda60>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:56.452269+00:00 + duration_ms: 29.446 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-6.yaml new file mode 100644 index 000000000..1efadb259 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-6.yaml @@ -0,0 +1,225 @@ +--- +uuid: 77ecd037-e5af-4136-8de9-7d628a6d6c0d +task_id: urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-6 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light on? Please answer "yes" or "no". + expect_response: + - 'no' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light on? Please answer "yes" or "no". + context: + id: 01KHJQXN3RCPTT3G3ZGXE403N4 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:56.696408+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:56.697990+00:00 + - role: user + content: Is the bedroom light on? Please answer "yes" or "no". + attachments: null + created: 2026-02-16 08:08:56.696482+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c3f93d70>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:56.697999+00:00 + duration_ms: 23.385 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-7.yaml new file mode 100644 index 000000000..914531c55 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-7.yaml @@ -0,0 +1,225 @@ +--- +uuid: d4efac52-dfc1-4a03-8ac0-46c34878276c +task_id: urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-7 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light on? Please answer "yes" or "no". + expect_response: + - 'no' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light on? Please answer "yes" or "no". + context: + id: 01KHJQXNAG6K1BH7WCAWATFEXP + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:56.912341+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:56.913977+00:00 + - role: user + content: Is the bedroom light on? Please answer "yes" or "no". + attachments: null + created: 2026-02-16 08:08:56.912426+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f50b9bc0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:56.913987+00:00 + duration_ms: 23.192 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-8.yaml new file mode 100644 index 000000000..c4619b34f --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-8.yaml @@ -0,0 +1,225 @@ +--- +uuid: a7b6ecec-9611-4529-9e0f-3c26c8b4e241 +task_id: urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-8 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light on? Please answer "yes" or "no". + expect_response: + - 'no' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light on? Please answer "yes" or "no". + context: + id: 01KHJQXNV96EMTQCPFMZXPGXVZ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:57.449286+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:57.450724+00:00 + - role: user + content: Is the bedroom light on? Please answer "yes" or "no". + attachments: null + created: 2026-02-16 08:08:57.449362+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c3bcfa00>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:57.450734+00:00 + duration_ms: 23.261 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-9.yaml new file mode 100644 index 000000000..8f52de7c2 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-9.yaml @@ -0,0 +1,225 @@ +--- +uuid: 89ee12fa-24d9-4fe8-b730-fd479d47c092 +task_id: urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-9 +model_id: gemma-3-27b-it +category: +- light +- question +task: + input_text: Is the bedroom light on? Please answer "yes" or "no". + expect_response: + - 'no' + - 'off' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the bedroom light on? Please answer "yes" or "no". + context: + id: 01KHJQXP1NJSVDP18BRPNECP3F + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:08:57.653613+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:08:57.656032+00:00 + - role: user + content: Is the bedroom light on? Please answer "yes" or "no". + attachments: null + created: 2026-02-16 08:08:57.653687+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4af3270>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:08:57.656042+00:00 + duration_ms: 174.026 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-0.yaml new file mode 100644 index 000000000..d7b06756b --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-0.yaml @@ -0,0 +1,224 @@ +--- +uuid: d84b5c40-83dd-46ba-9200-86ab98515427 +task_id: urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-0 +model_id: gemma-3-27b-it +category: +- media-player +- question +task: + input_text: Is the living room media player playing, paused, or stopped? + expect_response: + - paused +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the living room media player playing, paused, or stopped? + context: + id: 01KHJQXWPK4HXEZR3NK90XW1G2 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:04.467485+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:04.469064+00:00 + - role: user + content: Is the living room media player playing, paused, or stopped? + attachments: null + created: 2026-02-16 08:09:04.467556+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c3f52980>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:04.469074+00:00 + duration_ms: 28.777 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-1.yaml new file mode 100644 index 000000000..93f4ca627 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-1.yaml @@ -0,0 +1,224 @@ +--- +uuid: 1d41d77e-3a40-4726-917a-e17fe85d8cf1 +task_id: urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-1 +model_id: gemma-3-27b-it +category: +- media-player +- question +task: + input_text: Is the living room media player playing, paused, or stopped? + expect_response: + - paused +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the living room media player playing, paused, or stopped? + context: + id: 01KHJQXWW8SAQ2GJNQ40SPB1RQ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:04.649078+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:04.657213+00:00 + - role: user + content: Is the living room media player playing, paused, or stopped? + attachments: null + created: 2026-02-16 08:09:04.649147+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1ec9a60>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:04.657226+00:00 + duration_ms: 182.938 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-2.yaml new file mode 100644 index 000000000..190189e7f --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-2.yaml @@ -0,0 +1,224 @@ +--- +uuid: cdc5d725-8250-4fdb-86e4-f57fdb09dc41 +task_id: urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-2 +model_id: gemma-3-27b-it +category: +- media-player +- question +task: + input_text: Is the living room media player playing, paused, or stopped? + expect_response: + - paused +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the living room media player playing, paused, or stopped? + context: + id: 01KHJQXX79CAK3BRX7THFXWMJN + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:05.001586+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:05.003617+00:00 + - role: user + content: Is the living room media player playing, paused, or stopped? + attachments: null + created: 2026-02-16 08:09:05.001655+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c37837f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:05.003627+00:00 + duration_ms: 26.925 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-3.yaml new file mode 100644 index 000000000..ae2ee4075 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-3.yaml @@ -0,0 +1,224 @@ +--- +uuid: ed4d2133-c2d5-40cb-884a-f30325e79aea +task_id: urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-3 +model_id: gemma-3-27b-it +category: +- media-player +- question +task: + input_text: Is the living room media player playing, paused, or stopped? + expect_response: + - paused +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the living room media player playing, paused, or stopped? + context: + id: 01KHJQXXR6A7ZY26W2CPANNG96 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:05.542799+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:05.544347+00:00 + - role: user + content: Is the living room media player playing, paused, or stopped? + attachments: null + created: 2026-02-16 08:09:05.542868+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4e205c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:05.544356+00:00 + duration_ms: 23.955 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-4.yaml new file mode 100644 index 000000000..c445f34bd --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-4.yaml @@ -0,0 +1,224 @@ +--- +uuid: 795a55f7-efe2-4d46-a94d-6009030c6861 +task_id: urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-4 +model_id: gemma-3-27b-it +category: +- media-player +- question +task: + input_text: Is the living room media player playing, paused, or stopped? + expect_response: + - paused +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the living room media player playing, paused, or stopped? + context: + id: 01KHJQXXYMS5SGD0TVKRC8P6QE + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:05.748941+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:05.750914+00:00 + - role: user + content: Is the living room media player playing, paused, or stopped? + attachments: null + created: 2026-02-16 08:09:05.749013+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2727060>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:05.750923+00:00 + duration_ms: 23.715 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-5.yaml new file mode 100644 index 000000000..cbe921fc5 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-5.yaml @@ -0,0 +1,224 @@ +--- +uuid: ccc4af4a-83b2-4c6a-bb6c-6dda7d554b06 +task_id: urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-5 +model_id: gemma-3-27b-it +category: +- media-player +- question +task: + input_text: Is the living room media player playing, paused, or stopped? + expect_response: + - paused +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the living room media player playing, paused, or stopped? + context: + id: 01KHJQXY50WY6EWAEZS4KM7R79 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:05.952356+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:05.953786+00:00 + - role: user + content: Is the living room media player playing, paused, or stopped? + attachments: null + created: 2026-02-16 08:09:05.952425+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f483b8a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:05.953796+00:00 + duration_ms: 169.28 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-6.yaml new file mode 100644 index 000000000..300c3c359 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-6.yaml @@ -0,0 +1,224 @@ +--- +uuid: ecf4d974-c53c-4b56-80fd-54a1a2321c38 +task_id: urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-6 +model_id: gemma-3-27b-it +category: +- media-player +- question +task: + input_text: Is the living room media player playing, paused, or stopped? + expect_response: + - paused +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the living room media player playing, paused, or stopped? + context: + id: 01KHJQXYH48C33MR9A4CTMZFJ1 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:06.340723+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:06.342162+00:00 + - role: user + content: Is the living room media player playing, paused, or stopped? + attachments: null + created: 2026-02-16 08:09:06.340792+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1ad5170>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:06.342172+00:00 + duration_ms: 28.13 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-7.yaml new file mode 100644 index 000000000..1941e9693 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-7.yaml @@ -0,0 +1,224 @@ +--- +uuid: 75e9676c-c7c1-4a37-ade4-68e55493fd32 +task_id: urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-7 +model_id: gemma-3-27b-it +category: +- media-player +- question +task: + input_text: Is the living room media player playing, paused, or stopped? + expect_response: + - paused +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the living room media player playing, paused, or stopped? + context: + id: 01KHJQXYQHVGG9R6E0YBRYYF69 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:06.545108+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:06.546694+00:00 + - role: user + content: Is the living room media player playing, paused, or stopped? + attachments: null + created: 2026-02-16 08:09:06.545178+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f49cfd70>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:06.546703+00:00 + duration_ms: 175.672 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-8.yaml new file mode 100644 index 000000000..a785dfe78 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-8.yaml @@ -0,0 +1,224 @@ +--- +uuid: b9b7675b-c046-4bf3-8338-8c73764dcfab +task_id: urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-8 +model_id: gemma-3-27b-it +category: +- media-player +- question +task: + input_text: Is the living room media player playing, paused, or stopped? + expect_response: + - paused +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the living room media player playing, paused, or stopped? + context: + id: 01KHJQXZ33KT65T3MX38P702DR + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:06.915537+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:06.917570+00:00 + - role: user + content: Is the living room media player playing, paused, or stopped? + attachments: null + created: 2026-02-16 08:09:06.915605+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1be7060>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:06.917580+00:00 + duration_ms: 29.536 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-9.yaml new file mode 100644 index 000000000..ecfc8be44 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-9.yaml @@ -0,0 +1,224 @@ +--- +uuid: b45b97e0-dd53-4e22-bfc9-f040edae5abf +task_id: urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-9 +model_id: gemma-3-27b-it +category: +- media-player +- question +task: + input_text: Is the living room media player playing, paused, or stopped? + expect_response: + - paused +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the living room media player playing, paused, or stopped? + context: + id: 01KHJQXZ9XT5Q7BTGB2TQNTPMR + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:07.133391+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:07.135368+00:00 + - role: user + content: Is the living room media player playing, paused, or stopped? + attachments: null + created: 2026-02-16 08:09:07.133460+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f45c2980>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:07.135378+00:00 + duration_ms: 24.281 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-0.yaml new file mode 100644 index 000000000..37cb80a3b --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-0.yaml @@ -0,0 +1,224 @@ +--- +uuid: b945bf11-9ef9-44e4-bd4a-ae3ad337be19 +task_id: urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-0 +model_id: gemma-3-27b-it +category: +- media-player +- question +task: + input_text: Is the media player playing, paused, or stopped? + expect_response: + - playing +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the media player playing, paused, or stopped? + context: + id: 01KHJQXSE8T165SYS5RJMB7B4H + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:01.129049+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:01.130611+00:00 + - role: user + content: Is the media player playing, paused, or stopped? + attachments: null + created: 2026-02-16 08:09:01.129117+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c26e2b90>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:01.130620+00:00 + duration_ms: 28.622 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-1.yaml new file mode 100644 index 000000000..ccc9cae6b --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-1.yaml @@ -0,0 +1,224 @@ +--- +uuid: a8f4c495-5afd-4201-bcb8-ffa64b720fae +task_id: urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-1 +model_id: gemma-3-27b-it +category: +- media-player +- question +task: + input_text: Is the media player playing, paused, or stopped? + expect_response: + - playing +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the media player playing, paused, or stopped? + context: + id: 01KHJQXSNJ087G5RHCXP22FNWW + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:01.362868+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:01.364426+00:00 + - role: user + content: Is the media player playing, paused, or stopped? + attachments: null + created: 2026-02-16 08:09:01.362939+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f48caae0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:01.364437+00:00 + duration_ms: 31.588 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-2.yaml new file mode 100644 index 000000000..03c9fae94 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-2.yaml @@ -0,0 +1,224 @@ +--- +uuid: ce2c7746-1856-4ec2-b6d1-6f1dfec0e18d +task_id: urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-2 +model_id: gemma-3-27b-it +category: +- media-player +- question +task: + input_text: Is the media player playing, paused, or stopped? + expect_response: + - playing +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the media player playing, paused, or stopped? + context: + id: 01KHJQXSW73AJ1PYZ52T4TEQPM + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:01.575430+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:01.577302+00:00 + - role: user + content: Is the media player playing, paused, or stopped? + attachments: null + created: 2026-02-16 08:09:01.575499+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c0f02c40>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:01.577312+00:00 + duration_ms: 172.185 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-3.yaml new file mode 100644 index 000000000..0025ee1db --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-3.yaml @@ -0,0 +1,224 @@ +--- +uuid: 2a8b4dce-8a4a-439c-851e-8de87b5fe3b2 +task_id: urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-3 +model_id: gemma-3-27b-it +category: +- media-player +- question +task: + input_text: Is the media player playing, paused, or stopped? + expect_response: + - playing +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the media player playing, paused, or stopped? + context: + id: 01KHJQXTMQMBGGWBZSJ7Y4PRDC + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:02.359743+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:02.361560+00:00 + - role: user + content: Is the media player playing, paused, or stopped? + attachments: null + created: 2026-02-16 08:09:02.359813+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47d43826c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:02.361570+00:00 + duration_ms: 22.653 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-4.yaml new file mode 100644 index 000000000..ce259ab32 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-4.yaml @@ -0,0 +1,224 @@ +--- +uuid: 99d6615f-b6e9-4c1b-b67d-84260ce3140e +task_id: urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-4 +model_id: gemma-3-27b-it +category: +- media-player +- question +task: + input_text: Is the media player playing, paused, or stopped? + expect_response: + - playing +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the media player playing, paused, or stopped? + context: + id: 01KHJQXTV59X6G7HK93FMZZG90 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:02.565521+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:02.567447+00:00 + - role: user + content: Is the media player playing, paused, or stopped? + attachments: null + created: 2026-02-16 08:09:02.565590+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f47c00f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:02.567458+00:00 + duration_ms: 176.856 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-5.yaml new file mode 100644 index 000000000..ab97d2bf3 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-5.yaml @@ -0,0 +1,224 @@ +--- +uuid: 716d863c-9b94-4c3b-8dcd-2c170f189000 +task_id: urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-5 +model_id: gemma-3-27b-it +category: +- media-player +- question +task: + input_text: Is the media player playing, paused, or stopped? + expect_response: + - playing +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the media player playing, paused, or stopped? + context: + id: 01KHJQXV6P71J5WZ18HG86YY9E + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:02.934341+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:02.936358+00:00 + - role: user + content: Is the media player playing, paused, or stopped? + attachments: null + created: 2026-02-16 08:09:02.934422+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1f22090>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:02.936367+00:00 + duration_ms: 24.988 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-6.yaml new file mode 100644 index 000000000..8f029e489 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-6.yaml @@ -0,0 +1,224 @@ +--- +uuid: b8e87384-b7cc-4054-bb72-6f410519e0bc +task_id: urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-6 +model_id: gemma-3-27b-it +category: +- media-player +- question +task: + input_text: Is the media player playing, paused, or stopped? + expect_response: + - playing +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the media player playing, paused, or stopped? + context: + id: 01KHJQXVDWTWGHV38QKM5E2MMJ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:03.165021+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:03.167007+00:00 + - role: user + content: Is the media player playing, paused, or stopped? + attachments: null + created: 2026-02-16 08:09:03.165091+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1ed7d70>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:03.167016+00:00 + duration_ms: 175.395 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-7.yaml new file mode 100644 index 000000000..92ca47eaa --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-7.yaml @@ -0,0 +1,224 @@ +--- +uuid: 5e7aa288-b81d-4bbf-bf60-ce58d0b598e3 +task_id: urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-7 +model_id: gemma-3-27b-it +category: +- media-player +- question +task: + input_text: Is the media player playing, paused, or stopped? + expect_response: + - playing +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the media player playing, paused, or stopped? + context: + id: 01KHJQXVS0MVVW9J1EDW5QGM9K + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:03.520602+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:03.522652+00:00 + - role: user + content: Is the media player playing, paused, or stopped? + attachments: null + created: 2026-02-16 08:09:03.520677+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4da9b10>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:03.522662+00:00 + duration_ms: 174.006 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-8.yaml new file mode 100644 index 000000000..38f6629fd --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-8.yaml @@ -0,0 +1,224 @@ +--- +uuid: 550c4b4d-dbce-41ad-890d-23cf56e41866 +task_id: urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-8 +model_id: gemma-3-27b-it +category: +- media-player +- question +task: + input_text: Is the media player playing, paused, or stopped? + expect_response: + - playing +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the media player playing, paused, or stopped? + context: + id: 01KHJQXW3SMXW8TRS680NHHV8T + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:03.865363+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:03.866789+00:00 + - role: user + content: Is the media player playing, paused, or stopped? + attachments: null + created: 2026-02-16 08:09:03.865433+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2676140>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:03.866799+00:00 + duration_ms: 25.959 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-9.yaml new file mode 100644 index 000000000..741b3445f --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-9.yaml @@ -0,0 +1,224 @@ +--- +uuid: 0332eaff-143b-492a-bfa3-03f8ea7de658 +task_id: urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-9 +model_id: gemma-3-27b-it +category: +- media-player +- question +task: + input_text: Is the media player playing, paused, or stopped? + expect_response: + - playing +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is the media player playing, paused, or stopped? + context: + id: 01KHJQXW9WFVXXEC1DW64B4FPG + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:04.060334+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:04.063168+00:00 + - role: user + content: Is the media player playing, paused, or stopped? + attachments: null + created: 2026-02-16 08:09:04.060405+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c3c8db10>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:04.063179+00:00 + duration_ms: 169.168 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-0.yaml new file mode 100644 index 000000000..ce4cfa10d --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-0.yaml @@ -0,0 +1,225 @@ +--- +uuid: 17e8b471-a141-4fbc-aee7-d023a413d71b +task_id: urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-0 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: What is the battery level of the motion sensor? + expect_response: + - 55% + - 55 percent +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the battery level of the motion sensor? + context: + id: 01KHJQXZGWYHES3RK1PAV6GC45 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:07.356140+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:07.357694+00:00 + - role: user + content: What is the battery level of the motion sensor? + attachments: null + created: 2026-02-16 08:09:07.356209+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c16a0720>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:07.357704+00:00 + duration_ms: 31.586 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-1.yaml new file mode 100644 index 000000000..273253501 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-1.yaml @@ -0,0 +1,225 @@ +--- +uuid: 2900880f-1be2-40f5-84f3-025460cd3caf +task_id: urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-1 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: What is the battery level of the motion sensor? + expect_response: + - 55% + - 55 percent +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the battery level of the motion sensor? + context: + id: 01KHJQXZQX2EEYEPP0EF8KF685 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:07.581385+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:07.584128+00:00 + - role: user + content: What is the battery level of the motion sensor? + attachments: null + created: 2026-02-16 08:09:07.581455+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1e9a560>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:07.584140+00:00 + duration_ms: 184.418 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-2.yaml new file mode 100644 index 000000000..6c41d1736 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-2.yaml @@ -0,0 +1,225 @@ +--- +uuid: 23385195-fe82-4892-b9ae-512349c8625e +task_id: urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-2 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: What is the battery level of the motion sensor? + expect_response: + - 55% + - 55 percent +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the battery level of the motion sensor? + context: + id: 01KHJQY05WMAKFKJJZE9BEJR3Z + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:08.028133+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:08.029711+00:00 + - role: user + content: What is the battery level of the motion sensor? + attachments: null + created: 2026-02-16 08:09:08.028202+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1f36610>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:08.029720+00:00 + duration_ms: 26.991 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-3.yaml new file mode 100644 index 000000000..42dd92eb2 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-3.yaml @@ -0,0 +1,225 @@ +--- +uuid: 83c16cf9-5b19-478d-981f-0e82ef4727a8 +task_id: urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-3 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: What is the battery level of the motion sensor? + expect_response: + - 55% + - 55 percent +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the battery level of the motion sensor? + context: + id: 01KHJQY0BH13JQGR7SC14CWDGA + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:08.209993+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:08.211595+00:00 + - role: user + content: What is the battery level of the motion sensor? + attachments: null + created: 2026-02-16 08:09:08.210063+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47d43b1220>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:08.211605+00:00 + duration_ms: 22.196 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-4.yaml new file mode 100644 index 000000000..01e59dcad --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-4.yaml @@ -0,0 +1,225 @@ +--- +uuid: 748dbf5b-6148-4308-aeec-98f480f29102 +task_id: urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-4 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: What is the battery level of the motion sensor? + expect_response: + - 55% + - 55 percent +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the battery level of the motion sensor? + context: + id: 01KHJQY0HK71KG39A0EAWM937S + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:08.403208+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:08.404657+00:00 + - role: user + content: What is the battery level of the motion sensor? + attachments: null + created: 2026-02-16 08:09:08.403302+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c18500f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:08.404667+00:00 + duration_ms: 24.88 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-5.yaml new file mode 100644 index 000000000..7d04e57c1 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-5.yaml @@ -0,0 +1,225 @@ +--- +uuid: 09c12ef9-0b19-40aa-93ec-e9c9bdf9da6f +task_id: urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-5 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: What is the battery level of the motion sensor? + expect_response: + - 55% + - 55 percent +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the battery level of the motion sensor? + context: + id: 01KHJQY0R4490J5ZJ3RSJ8KPTJ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:08.613059+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:08.614571+00:00 + - role: user + content: What is the battery level of the motion sensor? + attachments: null + created: 2026-02-16 08:09:08.613132+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4d36820>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:08.614580+00:00 + duration_ms: 179.271 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-6.yaml new file mode 100644 index 000000000..bcc8671eb --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-6.yaml @@ -0,0 +1,225 @@ +--- +uuid: a61cdaea-945a-4328-aeed-c191cf00c8a1 +task_id: urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-6 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: What is the battery level of the motion sensor? + expect_response: + - 55% + - 55 percent +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the battery level of the motion sensor? + context: + id: 01KHJQY1365PE7F18Y4T8WPCTX + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:08.966100+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:08.968188+00:00 + - role: user + content: What is the battery level of the motion sensor? + attachments: null + created: 2026-02-16 08:09:08.966170+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2dc24b0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:08.968199+00:00 + duration_ms: 171.094 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-7.yaml new file mode 100644 index 000000000..c8dd8c884 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-7.yaml @@ -0,0 +1,225 @@ +--- +uuid: fbde6a11-1a1c-42e8-9c63-43e445c34b86 +task_id: urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-7 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: What is the battery level of the motion sensor? + expect_response: + - 55% + - 55 percent +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the battery level of the motion sensor? + context: + id: 01KHJQY1E87M4R2B3R4VTQWWSJ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:09.320603+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:09.322502+00:00 + - role: user + content: What is the battery level of the motion sensor? + attachments: null + created: 2026-02-16 08:09:09.320673+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c26928d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:09.322513+00:00 + duration_ms: 28.92 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-8.yaml new file mode 100644 index 000000000..4afcb6ec9 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-8.yaml @@ -0,0 +1,225 @@ +--- +uuid: bdd3726b-5c0b-42b1-acab-804fb4dd8ba1 +task_id: urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-8 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: What is the battery level of the motion sensor? + expect_response: + - 55% + - 55 percent +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the battery level of the motion sensor? + context: + id: 01KHJQY1MXNE3SBH9H7YSCQ4EV + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:09.533628+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:09.535173+00:00 + - role: user + content: What is the battery level of the motion sensor? + attachments: null + created: 2026-02-16 08:09:09.533697+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c3c5bab0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:09.535182+00:00 + duration_ms: 25.04 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-9.yaml new file mode 100644 index 000000000..7491ffee7 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-9.yaml @@ -0,0 +1,225 @@ +--- +uuid: 8b6a93d8-89e3-4f8f-be06-9d00da0393ff +task_id: urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-9 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: What is the battery level of the motion sensor? + expect_response: + - 55% + - 55 percent +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the battery level of the motion sensor? + context: + id: 01KHJQY1VK2RT5SFBGC8ARY24R + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:09.747106+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:09.748720+00:00 + - role: user + content: What is the battery level of the motion sensor? + attachments: null + created: 2026-02-16 08:09:09.747176+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c3fa4f60>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:09.748730+00:00 + duration_ms: 31.203 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-0.yaml new file mode 100644 index 000000000..91c96021c --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-0.yaml @@ -0,0 +1,225 @@ +--- +uuid: 77075f6c-0177-4143-b581-ab644f1e615b +task_id: urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-0 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: What is the motion sensor battery level? + expect_response: + - 13% + - 13 percent +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the motion sensor battery level? + context: + id: 01KHJQY21XAFNEC042KHEQAZQX + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:09.949293+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:09.951268+00:00 + - role: user + content: What is the motion sensor battery level? + attachments: null + created: 2026-02-16 08:09:09.949363+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c16c4040>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:09.951277+00:00 + duration_ms: 25.533 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-1.yaml new file mode 100644 index 000000000..f5dabfa29 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-1.yaml @@ -0,0 +1,225 @@ +--- +uuid: 864c0ff3-675b-4026-81e6-78183cda0848 +task_id: urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-1 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: What is the motion sensor battery level? + expect_response: + - 13% + - 13 percent +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the motion sensor battery level? + context: + id: 01KHJQY28SGFAHDAX16A3G5A49 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:10.170061+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:10.172047+00:00 + - role: user + content: What is the motion sensor battery level? + attachments: null + created: 2026-02-16 08:09:10.170131+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c18ad9b0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:10.172057+00:00 + duration_ms: 36.108 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-2.yaml new file mode 100644 index 000000000..8b5dc8197 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-2.yaml @@ -0,0 +1,225 @@ +--- +uuid: 2d13bc0b-bca0-4890-8437-6fcd3351e5be +task_id: urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-2 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: What is the motion sensor battery level? + expect_response: + - 13% + - 13 percent +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the motion sensor battery level? + context: + id: 01KHJQY2FG2YJNBV6ME6ER00P9 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:10.384758+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:10.386267+00:00 + - role: user + content: What is the motion sensor battery level? + attachments: null + created: 2026-02-16 08:09:10.384830+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2676980>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:10.386277+00:00 + duration_ms: 28.151 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-3.yaml new file mode 100644 index 000000000..8ae915a5d --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-3.yaml @@ -0,0 +1,225 @@ +--- +uuid: fdd906d5-4b5e-4a41-98c9-0ed7c45f3e43 +task_id: urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-3 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: What is the motion sensor battery level? + expect_response: + - 13% + - 13 percent +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the motion sensor battery level? + context: + id: 01KHJQY2P0RS8BXTR47EDJC754 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:10.592682+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:10.595922+00:00 + - role: user + content: What is the motion sensor battery level? + attachments: null + created: 2026-02-16 08:09:10.592755+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1eb9850>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:10.595937+00:00 + duration_ms: 26.023 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-4.yaml new file mode 100644 index 000000000..56541cce4 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-4.yaml @@ -0,0 +1,225 @@ +--- +uuid: c8814895-aac2-4935-8d79-970facc0a806 +task_id: urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-4 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: What is the motion sensor battery level? + expect_response: + - 13% + - 13 percent +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the motion sensor battery level? + context: + id: 01KHJQY2Y3P3P222XKNRG3PKEF + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:10.851913+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:10.853502+00:00 + - role: user + content: What is the motion sensor battery level? + attachments: null + created: 2026-02-16 08:09:10.851985+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4e66cf0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:10.853511+00:00 + duration_ms: 31.77 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-5.yaml new file mode 100644 index 000000000..9c2fd2abc --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-5.yaml @@ -0,0 +1,225 @@ +--- +uuid: f4777e94-16b2-44b9-a344-ff315b6bfe00 +task_id: urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-5 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: What is the motion sensor battery level? + expect_response: + - 13% + - 13 percent +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the motion sensor battery level? + context: + id: 01KHJQY34CY5S23VN9NH2MEDNV + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:11.052489+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:11.054012+00:00 + - role: user + content: What is the motion sensor battery level? + attachments: null + created: 2026-02-16 08:09:11.052559+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c3f90b40>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:11.054021+00:00 + duration_ms: 25.728 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-6.yaml new file mode 100644 index 000000000..597f0601b --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-6.yaml @@ -0,0 +1,225 @@ +--- +uuid: cc46e92c-b5a0-4bcc-8973-95f949c4a833 +task_id: urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-6 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: What is the motion sensor battery level? + expect_response: + - 13% + - 13 percent +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the motion sensor battery level? + context: + id: 01KHJQY3BMFHVQ3XFF6MBC6BX2 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:11.285045+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:11.286758+00:00 + - role: user + content: What is the motion sensor battery level? + attachments: null + created: 2026-02-16 08:09:11.285118+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c36e4510>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:11.286772+00:00 + duration_ms: 30.371 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-7.yaml new file mode 100644 index 000000000..0bca5b526 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-7.yaml @@ -0,0 +1,225 @@ +--- +uuid: fd8a8ec3-bb7f-4358-a230-acd051ce7bdc +task_id: urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-7 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: What is the motion sensor battery level? + expect_response: + - 13% + - 13 percent +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the motion sensor battery level? + context: + id: 01KHJQY3HXM0GSSK9NBW84DSRK + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:11.485780+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:11.487997+00:00 + - role: user + content: What is the motion sensor battery level? + attachments: null + created: 2026-02-16 08:09:11.485852+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47d436c3b0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:11.488007+00:00 + duration_ms: 23.693 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-8.yaml new file mode 100644 index 000000000..da09de3e5 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-8.yaml @@ -0,0 +1,225 @@ +--- +uuid: 9072b87d-21b7-4e6d-a663-cdeed20d5fde +task_id: urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-8 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: What is the motion sensor battery level? + expect_response: + - 13% + - 13 percent +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the motion sensor battery level? + context: + id: 01KHJQY3RH7YM3BYP97B7JFG4N + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:11.697127+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:11.701351+00:00 + - role: user + content: What is the motion sensor battery level? + attachments: null + created: 2026-02-16 08:09:11.697198+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c254b690>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:11.701363+00:00 + duration_ms: 27.009 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-9.yaml new file mode 100644 index 000000000..86d724d4a --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-9.yaml @@ -0,0 +1,225 @@ +--- +uuid: 9a37ffdd-1a2a-45de-8dfd-abb2254caa19 +task_id: urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-9 +model_id: gemma-3-27b-it +category: +- sensor +- question +task: + input_text: What is the motion sensor battery level? + expect_response: + - 13% + - 13 percent +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the motion sensor battery level? + context: + id: 01KHJQY3YVH7ZQA08WX4W255AS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:11.899428+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:11.907043+00:00 + - role: user + content: What is the motion sensor battery level? + attachments: null + created: 2026-02-16 08:09:11.899500+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2724930>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:11.907055+00:00 + duration_ms: 33.301 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-0.yaml new file mode 100644 index 000000000..beb2433de --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-0.yaml @@ -0,0 +1,224 @@ +--- +uuid: 99b16b3a-78ce-4e40-a0db-9b93aa1511fc +task_id: urban_loft_au_todo-how_many_items_are_on_my_task_list-0 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: How many items are on my task list? + expect_response: + - '2' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many items are on my task list? + context: + id: 01KHJQY45BB71ZS9FP8J1S35B2 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:12.107413+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:12.110114+00:00 + - role: user + content: How many items are on my task list? + attachments: null + created: 2026-02-16 08:09:12.107486+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f509c460>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:12.110124+00:00 + duration_ms: 22.919 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-1.yaml new file mode 100644 index 000000000..e154f32bb --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-1.yaml @@ -0,0 +1,224 @@ +--- +uuid: fffb5552-280b-4102-9a7b-86bd89ed5815 +task_id: urban_loft_au_todo-how_many_items_are_on_my_task_list-1 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: How many items are on my task list? + expect_response: + - '2' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many items are on my task list? + context: + id: 01KHJQY4C0H2094633XYN7C707 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:12.320707+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:12.322130+00:00 + - role: user + content: How many items are on my task list? + attachments: null + created: 2026-02-16 08:09:12.320780+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f47f22a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:12.322140+00:00 + duration_ms: 24.092 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-2.yaml new file mode 100644 index 000000000..f9951a756 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-2.yaml @@ -0,0 +1,224 @@ +--- +uuid: 277399e2-7ad6-488b-85ac-861f3020e285 +task_id: urban_loft_au_todo-how_many_items_are_on_my_task_list-2 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: How many items are on my task list? + expect_response: + - '2' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many items are on my task list? + context: + id: 01KHJQY4J8G5ZSJ6KREPFBPYXF + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:12.520834+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:12.522324+00:00 + - role: user + content: How many items are on my task list? + attachments: null + created: 2026-02-16 08:09:12.520905+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f49cd9b0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:12.522333+00:00 + duration_ms: 22.732 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-3.yaml new file mode 100644 index 000000000..5efe92550 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-3.yaml @@ -0,0 +1,224 @@ +--- +uuid: 6ebcdb96-f7aa-4dd1-984e-5d175e775737 +task_id: urban_loft_au_todo-how_many_items_are_on_my_task_list-3 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: How many items are on my task list? + expect_response: + - '2' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many items are on my task list? + context: + id: 01KHJQY4S54M7EBVNYH9P651R9 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:12.741938+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:12.743784+00:00 + - role: user + content: How many items are on my task list? + attachments: null + created: 2026-02-16 08:09:12.742008+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f45807d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:12.743793+00:00 + duration_ms: 32.196 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-4.yaml new file mode 100644 index 000000000..c1f7e0bf6 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-4.yaml @@ -0,0 +1,224 @@ +--- +uuid: 8f7cb0ab-1320-4da9-8218-a95b40c15dfc +task_id: urban_loft_au_todo-how_many_items_are_on_my_task_list-4 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: How many items are on my task list? + expect_response: + - '2' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many items are on my task list? + context: + id: 01KHJQY4ZGN856SJFSKWVQVEE9 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:12.945033+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:12.946589+00:00 + - role: user + content: How many items are on my task list? + attachments: null + created: 2026-02-16 08:09:12.945103+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4958670>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:12.946599+00:00 + duration_ms: 173.252 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-5.yaml new file mode 100644 index 000000000..667c1f070 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-5.yaml @@ -0,0 +1,224 @@ +--- +uuid: b3cf91d2-bdff-4085-8d51-3f6b0387c86a +task_id: urban_loft_au_todo-how_many_items_are_on_my_task_list-5 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: How many items are on my task list? + expect_response: + - '2' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many items are on my task list? + context: + id: 01KHJQY5B0X1QTE4S72MX8QV6C + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:13.312928+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:13.315776+00:00 + - role: user + content: How many items are on my task list? + attachments: null + created: 2026-02-16 08:09:13.312998+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c19d5900>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:13.315786+00:00 + duration_ms: 33.352 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-6.yaml new file mode 100644 index 000000000..e2b7e7d69 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-6.yaml @@ -0,0 +1,224 @@ +--- +uuid: ec3f7395-81c5-4254-9f72-1859b5f7f67c +task_id: urban_loft_au_todo-how_many_items_are_on_my_task_list-6 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: How many items are on my task list? + expect_response: + - '2' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many items are on my task list? + context: + id: 01KHJQY5KDY56KRYYXN3PZQN3C + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:13.581829+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:13.583405+00:00 + - role: user + content: How many items are on my task list? + attachments: null + created: 2026-02-16 08:09:13.581898+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c3d58f60>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:13.583414+00:00 + duration_ms: 30.807 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-7.yaml new file mode 100644 index 000000000..415da898a --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-7.yaml @@ -0,0 +1,224 @@ +--- +uuid: 79cf027f-e1d8-41f4-921a-30ad4acb4d10 +task_id: urban_loft_au_todo-how_many_items_are_on_my_task_list-7 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: How many items are on my task list? + expect_response: + - '2' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many items are on my task list? + context: + id: 01KHJQY5TM3Q8R1SYV83GGA4H2 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:13.812624+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:13.814173+00:00 + - role: user + content: How many items are on my task list? + attachments: null + created: 2026-02-16 08:09:13.812695+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c284eae0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:13.814182+00:00 + duration_ms: 32.72 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-8.yaml new file mode 100644 index 000000000..17e53ce98 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-8.yaml @@ -0,0 +1,224 @@ +--- +uuid: 61516373-df12-42e5-b80b-6e99eb3d2f6a +task_id: urban_loft_au_todo-how_many_items_are_on_my_task_list-8 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: How many items are on my task list? + expect_response: + - '2' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many items are on my task list? + context: + id: 01KHJQY6DJK4C03CVMQPQJ8X8C + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:14.418465+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:14.421874+00:00 + - role: user + content: How many items are on my task list? + attachments: null + created: 2026-02-16 08:09:14.418535+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f63eaf00>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:14.421883+00:00 + duration_ms: 25.018 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-9.yaml new file mode 100644 index 000000000..1251fd008 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-how_many_items_are_on_my_task_list-9.yaml @@ -0,0 +1,224 @@ +--- +uuid: 9dfe343c-d9eb-4654-a225-44d48dad724e +task_id: urban_loft_au_todo-how_many_items_are_on_my_task_list-9 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: How many items are on my task list? + expect_response: + - '2' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: How many items are on my task list? + context: + id: 01KHJQY6KP7CJG8S3X0XSMBGDF + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:14.614892+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:14.616714+00:00 + - role: user + content: How many items are on my task list? + attachments: null + created: 2026-02-16 08:09:14.614961+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c3bcdb10>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:14.616724+00:00 + duration_ms: 25.569 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-0.yaml new file mode 100644 index 000000000..ce203dcf8 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-0.yaml @@ -0,0 +1,224 @@ +--- +uuid: 51ac1159-3eb1-4d16-b3c5-cd5cd8c2bf5f +task_id: urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-0 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: What chores around the house do I need to complete? + expect_response: + - terrace light +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What chores around the house do I need to complete? + context: + id: 01KHJQYBT5WMHTB055JHCKXZGA + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:19.942027+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:19.943622+00:00 + - role: user + content: What chores around the house do I need to complete? + attachments: null + created: 2026-02-16 08:09:19.942100+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4455640>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:19.943631+00:00 + duration_ms: 167.953 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-1.yaml new file mode 100644 index 000000000..5202d4e77 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-1.yaml @@ -0,0 +1,224 @@ +--- +uuid: eeb8d6ef-b0b4-4091-a7ff-dd1fbb7e6621 +task_id: urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-1 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: What chores around the house do I need to complete? + expect_response: + - terrace light +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What chores around the house do I need to complete? + context: + id: 01KHJQYC5376Z0WY0TRFC6G8AH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:20.291929+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:20.296631+00:00 + - role: user + content: What chores around the house do I need to complete? + attachments: null + created: 2026-02-16 08:09:20.292005+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4e34a90>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:20.296644+00:00 + duration_ms: 34.919 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-2.yaml new file mode 100644 index 000000000..e5b257668 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-2.yaml @@ -0,0 +1,224 @@ +--- +uuid: 6857a219-4811-49b5-a7ec-685f7481cfd5 +task_id: urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-2 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: What chores around the house do I need to complete? + expect_response: + - terrace light +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What chores around the house do I need to complete? + context: + id: 01KHJQYCC9WX1GTCTW4ZEBVSXK + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:20.521566+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:20.524531+00:00 + - role: user + content: What chores around the house do I need to complete? + attachments: null + created: 2026-02-16 08:09:20.521637+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c3c8c0f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:20.524541+00:00 + duration_ms: 32.291 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-3.yaml new file mode 100644 index 000000000..3a3bed608 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-3.yaml @@ -0,0 +1,224 @@ +--- +uuid: b2d50ff4-8d4e-4346-a531-8cf34907a7ea +task_id: urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-3 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: What chores around the house do I need to complete? + expect_response: + - terrace light +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What chores around the house do I need to complete? + context: + id: 01KHJQYCM0PRVM3V5PBWY19GEB + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:20.768786+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:20.770399+00:00 + - role: user + content: What chores around the house do I need to complete? + attachments: null + created: 2026-02-16 08:09:20.768867+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f63c6140>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:20.770409+00:00 + duration_ms: 23.414 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-4.yaml new file mode 100644 index 000000000..a508ac14a --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-4.yaml @@ -0,0 +1,224 @@ +--- +uuid: 068d3c15-4e39-4bc6-a4ce-ad1ce5874d7b +task_id: urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-4 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: What chores around the house do I need to complete? + expect_response: + - terrace light +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What chores around the house do I need to complete? + context: + id: 01KHJQYCTG89N4FPRD9V842Q0C + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:20.977072+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:20.978612+00:00 + - role: user + content: What chores around the house do I need to complete? + attachments: null + created: 2026-02-16 08:09:20.977143+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c3936560>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:20.978622+00:00 + duration_ms: 26.968 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-5.yaml new file mode 100644 index 000000000..dedd5fac8 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-5.yaml @@ -0,0 +1,224 @@ +--- +uuid: 5f179f1e-63c0-42ca-a20e-527bc96223bb +task_id: urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-5 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: What chores around the house do I need to complete? + expect_response: + - terrace light +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What chores around the house do I need to complete? + context: + id: 01KHJQYD19VEA71XG399BYSN9G + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:21.193663+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:21.196894+00:00 + - role: user + content: What chores around the house do I need to complete? + attachments: null + created: 2026-02-16 08:09:21.193734+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c32c2ae0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:21.196904+00:00 + duration_ms: 177.387 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-6.yaml new file mode 100644 index 000000000..b32c60a7b --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-6.yaml @@ -0,0 +1,224 @@ +--- +uuid: d2fb3d73-4512-4045-93d3-5a8f731b0b7b +task_id: urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-6 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: What chores around the house do I need to complete? + expect_response: + - terrace light +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What chores around the house do I need to complete? + context: + id: 01KHJQYDC8PD21F6XJ8E6AD912 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:21.545053+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:21.549898+00:00 + - role: user + content: What chores around the house do I need to complete? + attachments: null + created: 2026-02-16 08:09:21.545125+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c34c7110>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:21.549911+00:00 + duration_ms: 29.644 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-7.yaml new file mode 100644 index 000000000..975175bf7 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-7.yaml @@ -0,0 +1,224 @@ +--- +uuid: 6646c74a-ce09-4531-9afb-4bfd97bffc2b +task_id: urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-7 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: What chores around the house do I need to complete? + expect_response: + - terrace light +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What chores around the house do I need to complete? + context: + id: 01KHJQYDJVG3VRVK8CPX93E8GK + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:21.756017+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:21.757577+00:00 + - role: user + content: What chores around the house do I need to complete? + attachments: null + created: 2026-02-16 08:09:21.756088+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1cd5010>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:21.757587+00:00 + duration_ms: 37.835 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-8.yaml new file mode 100644 index 000000000..8ff3f025c --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-8.yaml @@ -0,0 +1,224 @@ +--- +uuid: 630e3f57-afa6-493c-ae89-9c78e9bb7dad +task_id: urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-8 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: What chores around the house do I need to complete? + expect_response: + - terrace light +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What chores around the house do I need to complete? + context: + id: 01KHJQYDT96SV6AJAYPF3MEZ9A + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:21.993723+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:21.995758+00:00 + - role: user + content: What chores around the house do I need to complete? + attachments: null + created: 2026-02-16 08:09:21.993799+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2c754e0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:21.995768+00:00 + duration_ms: 32.942 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-9.yaml new file mode 100644 index 000000000..f471e673f --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-9.yaml @@ -0,0 +1,224 @@ +--- +uuid: 5fd291f9-35cb-4888-81e7-ef95dfbc4e2c +task_id: urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-9 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: What chores around the house do I need to complete? + expect_response: + - terrace light +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What chores around the house do I need to complete? + context: + id: 01KHJQYE15AXR3CE0YG4HMCZQS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:22.213859+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:22.215743+00:00 + - role: user + content: What chores around the house do I need to complete? + attachments: null + created: 2026-02-16 08:09:22.213929+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2cd5e80>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:22.215753+00:00 + duration_ms: 169.431 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-0.yaml new file mode 100644 index 000000000..95babfa4c --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-0.yaml @@ -0,0 +1,224 @@ +--- +uuid: 8614f32a-c606-451a-a673-4b9867872b47 +task_id: urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-0 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: What's is on my task list to buy at the grocery store? + expect_response: + - salad +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What's is on my task list to buy at the grocery store? + context: + id: 01KHJQYECJD8352G4TKKP1NSJK + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:22.578669+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:22.580088+00:00 + - role: user + content: What's is on my task list to buy at the grocery store? + attachments: null + created: 2026-02-16 08:09:22.578738+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47d41cc460>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:22.580097+00:00 + duration_ms: 26.352 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-1.yaml new file mode 100644 index 000000000..ca11f2c5e --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-1.yaml @@ -0,0 +1,224 @@ +--- +uuid: d2dce5ce-f34d-42a5-babb-5939e4743a7d +task_id: urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-1 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: What's is on my task list to buy at the grocery store? + expect_response: + - salad +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What's is on my task list to buy at the grocery store? + context: + id: 01KHJQYF0C62MSAV3A7G3ZCV2M + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:23.212174+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:23.214055+00:00 + - role: user + content: What's is on my task list to buy at the grocery store? + attachments: null + created: 2026-02-16 08:09:23.212270+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c3f39850>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:23.214066+00:00 + duration_ms: 23.516 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-2.yaml new file mode 100644 index 000000000..c36d89cce --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-2.yaml @@ -0,0 +1,224 @@ +--- +uuid: 6167dd23-341a-4920-b1d2-499a14e57f4d +task_id: urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-2 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: What's is on my task list to buy at the grocery store? + expect_response: + - salad +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What's is on my task list to buy at the grocery store? + context: + id: 01KHJQYF774P3460FD2B7MZSC6 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:23.431508+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:23.435922+00:00 + - role: user + content: What's is on my task list to buy at the grocery store? + attachments: null + created: 2026-02-16 08:09:23.431581+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f48940f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:23.435933+00:00 + duration_ms: 172.683 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-3.yaml new file mode 100644 index 000000000..cf1e83339 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-3.yaml @@ -0,0 +1,224 @@ +--- +uuid: 19a8cc48-3b8a-4136-a443-eef026274801 +task_id: urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-3 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: What's is on my task list to buy at the grocery store? + expect_response: + - salad +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What's is on my task list to buy at the grocery store? + context: + id: 01KHJQYFJ91YJAV7A2G9858CRS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:23.785503+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:23.787439+00:00 + - role: user + content: What's is on my task list to buy at the grocery store? + attachments: null + created: 2026-02-16 08:09:23.785575+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c272fb60>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:23.787449+00:00 + duration_ms: 24.906 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-4.yaml new file mode 100644 index 000000000..2a09aef07 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-4.yaml @@ -0,0 +1,224 @@ +--- +uuid: 42438274-3ee5-445c-99eb-8c7b6a2ddb17 +task_id: urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-4 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: What's is on my task list to buy at the grocery store? + expect_response: + - salad +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What's is on my task list to buy at the grocery store? + context: + id: 01KHJQYFRPYZSDPN5QV4K1FZ1H + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:23.990591+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:23.992727+00:00 + - role: user + content: What's is on my task list to buy at the grocery store? + attachments: null + created: 2026-02-16 08:09:23.990663+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c39283b0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:23.992737+00:00 + duration_ms: 24.778 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-5.yaml new file mode 100644 index 000000000..f7cb519cd --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-5.yaml @@ -0,0 +1,224 @@ +--- +uuid: eb4e98aa-ca1e-421b-9b0f-98f1dfaf1a03 +task_id: urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-5 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: What's is on my task list to buy at the grocery store? + expect_response: + - salad +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What's is on my task list to buy at the grocery store? + context: + id: 01KHJQYFZ8MH1FHS3E3XZQ88KY + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:24.200913+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:24.204223+00:00 + - role: user + content: What's is on my task list to buy at the grocery store? + attachments: null + created: 2026-02-16 08:09:24.200986+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f508a2a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:24.204234+00:00 + duration_ms: 33.391 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-6.yaml new file mode 100644 index 000000000..9991579a9 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-6.yaml @@ -0,0 +1,224 @@ +--- +uuid: 7dbdd954-31d3-4f80-8dd3-0f72fce5094f +task_id: urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-6 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: What's is on my task list to buy at the grocery store? + expect_response: + - salad +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What's is on my task list to buy at the grocery store? + context: + id: 01KHJQYG7P69F5YE395F7FD476 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:24.470933+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:24.472555+00:00 + - role: user + content: What's is on my task list to buy at the grocery store? + attachments: null + created: 2026-02-16 08:09:24.471006+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c3f528d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:24.472568+00:00 + duration_ms: 28.121 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-7.yaml new file mode 100644 index 000000000..eea021888 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-7.yaml @@ -0,0 +1,224 @@ +--- +uuid: 0fad4d91-a84d-41a9-89f9-61a292592748 +task_id: urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-7 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: What's is on my task list to buy at the grocery store? + expect_response: + - salad +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What's is on my task list to buy at the grocery store? + context: + id: 01KHJQYGEEAGGB2DDEK5DW9AYP + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:24.686417+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:24.688021+00:00 + - role: user + content: What's is on my task list to buy at the grocery store? + attachments: null + created: 2026-02-16 08:09:24.686490+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1ecab90>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:24.688031+00:00 + duration_ms: 31.075 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-8.yaml new file mode 100644 index 000000000..457c3a75c --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-8.yaml @@ -0,0 +1,224 @@ +--- +uuid: 29474adf-540e-457c-9ebe-b6c7aee798ce +task_id: urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-8 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: What's is on my task list to buy at the grocery store? + expect_response: + - salad +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What's is on my task list to buy at the grocery store? + context: + id: 01KHJQYGQG8BM0P8QSEHAF65H5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:24.976192+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:24.977675+00:00 + - role: user + content: What's is on my task list to buy at the grocery store? + attachments: null + created: 2026-02-16 08:09:24.976286+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2675900>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:24.977684+00:00 + duration_ms: 33.696 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-9.yaml new file mode 100644 index 000000000..fcfd887dd --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-9.yaml @@ -0,0 +1,224 @@ +--- +uuid: 1e530d22-f7e4-436c-961e-e730307ce078 +task_id: urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-9 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: What's is on my task list to buy at the grocery store? + expect_response: + - salad +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What's is on my task list to buy at the grocery store? + context: + id: 01KHJQYGY7G5C0ASX6HDZZ6M5T + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:25.191337+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:25.195144+00:00 + - role: user + content: What's is on my task list to buy at the grocery store? + attachments: null + created: 2026-02-16 08:09:25.191414+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1a87d70>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:25.195153+00:00 + duration_ms: 29.762 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-0.yaml new file mode 100644 index 000000000..8b69f9e38 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-0.yaml @@ -0,0 +1,224 @@ +--- +uuid: ab53a616-0733-4102-8cd2-7e9e513e5a8f +task_id: urban_loft_au_todo-who_do_i_need_to_call-0 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: Who do i need to call? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who do i need to call? + context: + id: 01KHJQY9QBAA125QY2P16GP3V0 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:17.803597+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:17.805124+00:00 + - role: user + content: Who do i need to call? + attachments: null + created: 2026-02-16 08:09:17.803666+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f44d8eb0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:17.805134+00:00 + duration_ms: 27.16 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-1.yaml new file mode 100644 index 000000000..c123d7717 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-1.yaml @@ -0,0 +1,224 @@ +--- +uuid: a97e483e-48c7-483a-abc5-48e83359a2d9 +task_id: urban_loft_au_todo-who_do_i_need_to_call-1 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: Who do i need to call? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who do i need to call? + context: + id: 01KHJQY9XP3XCFARC73YJ1XHYM + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:18.006699+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:18.008267+00:00 + - role: user + content: Who do i need to call? + attachments: null + created: 2026-02-16 08:09:18.006771+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c34f16f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:18.008277+00:00 + duration_ms: 27.762 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-2.yaml new file mode 100644 index 000000000..a81fa0ae6 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-2.yaml @@ -0,0 +1,224 @@ +--- +uuid: 2133ad7f-3700-4a72-9f4f-68f1e23cdce1 +task_id: urban_loft_au_todo-who_do_i_need_to_call-2 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: Who do i need to call? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who do i need to call? + context: + id: 01KHJQYA401VMQKFE167Q5B63Q + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:18.208308+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:18.209828+00:00 + - role: user + content: Who do i need to call? + attachments: null + created: 2026-02-16 08:09:18.208380+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c3c9bb60>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:18.209837+00:00 + duration_ms: 38.548 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-3.yaml new file mode 100644 index 000000000..dd7113463 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-3.yaml @@ -0,0 +1,224 @@ +--- +uuid: 0c48cc04-b137-4d44-a5ac-07385bc61245 +task_id: urban_loft_au_todo-who_do_i_need_to_call-3 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: Who do i need to call? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who do i need to call? + context: + id: 01KHJQYAADX8P159BTYYN2WKEH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:18.414007+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:18.415536+00:00 + - role: user + content: Who do i need to call? + attachments: null + created: 2026-02-16 08:09:18.414087+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4df4f60>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:18.415545+00:00 + duration_ms: 24.78 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-4.yaml new file mode 100644 index 000000000..3ea2681b4 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-4.yaml @@ -0,0 +1,224 @@ +--- +uuid: 5b3285ea-d444-43e5-b057-141c9f4e75f3 +task_id: urban_loft_au_todo-who_do_i_need_to_call-4 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: Who do i need to call? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who do i need to call? + context: + id: 01KHJQYAH2TPBYKHBCP0GXEQM4 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:18.626228+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:18.627742+00:00 + - role: user + content: Who do i need to call? + attachments: null + created: 2026-02-16 08:09:18.626325+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2953740>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:18.627752+00:00 + duration_ms: 29.303 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-5.yaml new file mode 100644 index 000000000..53578e872 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-5.yaml @@ -0,0 +1,224 @@ +--- +uuid: 8115b7a8-1a6d-4af4-be44-aba42237d775 +task_id: urban_loft_au_todo-who_do_i_need_to_call-5 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: Who do i need to call? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who do i need to call? + context: + id: 01KHJQYAQYYXMRYY5VCGWSB40T + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:18.846739+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:18.848616+00:00 + - role: user + content: Who do i need to call? + attachments: null + created: 2026-02-16 08:09:18.846810+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c36db1c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:18.848625+00:00 + duration_ms: 28.053 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-6.yaml new file mode 100644 index 000000000..53bbf9af7 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-6.yaml @@ -0,0 +1,224 @@ +--- +uuid: afc40525-87ea-4b62-9b3c-85eee1646ef7 +task_id: urban_loft_au_todo-who_do_i_need_to_call-6 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: Who do i need to call? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who do i need to call? + context: + id: 01KHJQYAZ0FWYXJNGV59QB80Z5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:19.072936+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:19.074847+00:00 + - role: user + content: Who do i need to call? + attachments: null + created: 2026-02-16 08:09:19.073008+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4b9e820>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:19.074857+00:00 + duration_ms: 32.184 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-7.yaml new file mode 100644 index 000000000..89a1657a4 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-7.yaml @@ -0,0 +1,224 @@ +--- +uuid: b02a2985-fc70-4da2-acb6-dbc7cb69871f +task_id: urban_loft_au_todo-who_do_i_need_to_call-7 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: Who do i need to call? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who do i need to call? + context: + id: 01KHJQYB62QZD1BB592HA2QK0W + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:19.298986+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:19.301051+00:00 + - role: user + content: Who do i need to call? + attachments: null + created: 2026-02-16 08:09:19.299058+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1eb9a60>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:19.301060+00:00 + duration_ms: 29.28 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-8.yaml new file mode 100644 index 000000000..3a90525a1 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-8.yaml @@ -0,0 +1,224 @@ +--- +uuid: 18f43dff-e6e9-4145-8a33-f47936d0c0e2 +task_id: urban_loft_au_todo-who_do_i_need_to_call-8 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: Who do i need to call? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who do i need to call? + context: + id: 01KHJQYBCDKEJRS25PVEB02SEY + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:19.501358+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:19.504034+00:00 + - role: user + content: Who do i need to call? + attachments: null + created: 2026-02-16 08:09:19.501430+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1fcac40>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:19.504044+00:00 + duration_ms: 24.658 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-9.yaml new file mode 100644 index 000000000..5745fe4f7 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_do_i_need_to_call-9.yaml @@ -0,0 +1,224 @@ +--- +uuid: 814ea57d-7b74-4aaf-84b6-243f2c347c48 +task_id: urban_loft_au_todo-who_do_i_need_to_call-9 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: Who do i need to call? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who do i need to call? + context: + id: 01KHJQYBKME4JJFAJSVHFP2K4K + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:19.732322+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:19.733911+00:00 + - role: user + content: Who do i need to call? + attachments: null + created: 2026-02-16 08:09:19.732398+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4e40300>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:19.733921+00:00 + duration_ms: 31.647 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-0.yaml new file mode 100644 index 000000000..63bb77257 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-0.yaml @@ -0,0 +1,224 @@ +--- +uuid: e8be2be4-8760-4f32-b0a0-4fd484b75b8e +task_id: urban_loft_au_todo-who_is_on_my_task_list_to_call-0 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: Who is on my task list to call? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who is on my task list to call? + context: + id: 01KHJQY6T1Q2J1X2BTF2BRGGGY + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:14.817642+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:14.819523+00:00 + - role: user + content: Who is on my task list to call? + attachments: null + created: 2026-02-16 08:09:14.817712+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2650300>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:14.819533+00:00 + duration_ms: 173.391 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-1.yaml new file mode 100644 index 000000000..80ae8b60e --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-1.yaml @@ -0,0 +1,224 @@ +--- +uuid: 3d65bfac-597e-46e1-b83f-e65408526562 +task_id: urban_loft_au_todo-who_is_on_my_task_list_to_call-1 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: Who is on my task list to call? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who is on my task list to call? + context: + id: 01KHJQY758JFPQ7WXTWFXN3HAQ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:15.176273+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:15.178296+00:00 + - role: user + content: Who is on my task list to call? + attachments: null + created: 2026-02-16 08:09:15.176356+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1f690c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:15.178306+00:00 + duration_ms: 23.553 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-2.yaml new file mode 100644 index 000000000..5e6966c0d --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-2.yaml @@ -0,0 +1,224 @@ +--- +uuid: def17cd3-3ad9-48f3-bc60-d8c9b9b33c38 +task_id: urban_loft_au_todo-who_is_on_my_task_list_to_call-2 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: Who is on my task list to call? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who is on my task list to call? + context: + id: 01KHJQY7BWDCP72Y7DJDBW700G + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:15.388610+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:15.390549+00:00 + - role: user + content: Who is on my task list to call? + attachments: null + created: 2026-02-16 08:09:15.388680+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c272fe20>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:15.390559+00:00 + duration_ms: 26.116 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-3.yaml new file mode 100644 index 000000000..81de90c4a --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-3.yaml @@ -0,0 +1,224 @@ +--- +uuid: 62baf2cb-db83-4559-b090-44f8c61d0b54 +task_id: urban_loft_au_todo-who_is_on_my_task_list_to_call-3 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: Who is on my task list to call? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who is on my task list to call? + context: + id: 01KHJQY7YE953G0JPT56YM40TC + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:15.982758+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:15.985712+00:00 + - role: user + content: Who is on my task list to call? + attachments: null + created: 2026-02-16 08:09:15.982827+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c199d0c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:15.985722+00:00 + duration_ms: 25.567 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-4.yaml new file mode 100644 index 000000000..bac55c7ea --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-4.yaml @@ -0,0 +1,224 @@ +--- +uuid: f9f7584e-e97d-4476-9759-b4ebc53e8e01 +task_id: urban_loft_au_todo-who_is_on_my_task_list_to_call-4 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: Who is on my task list to call? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who is on my task list to call? + context: + id: 01KHJQY84NBERYGATYMGYDJY3Q + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:16.181474+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:16.186033+00:00 + - role: user + content: Who is on my task list to call? + attachments: null + created: 2026-02-16 08:09:16.181543+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2ff21f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:16.186044+00:00 + duration_ms: 59.016 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-5.yaml new file mode 100644 index 000000000..b0ab39b96 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-5.yaml @@ -0,0 +1,224 @@ +--- +uuid: 9f389c02-0e15-466e-9667-ec1a47c311f5 +task_id: urban_loft_au_todo-who_is_on_my_task_list_to_call-5 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: Who is on my task list to call? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who is on my task list to call? + context: + id: 01KHJQY8BT27J5KA1X3RFW6VG1 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:16.411020+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:16.416276+00:00 + - role: user + content: Who is on my task list to call? + attachments: null + created: 2026-02-16 08:09:16.411095+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1906770>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:16.416288+00:00 + duration_ms: 177.206 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-6.yaml new file mode 100644 index 000000000..7bea56c70 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-6.yaml @@ -0,0 +1,224 @@ +--- +uuid: 93dede84-3dcd-494b-91f7-5a194c842cbf +task_id: urban_loft_au_todo-who_is_on_my_task_list_to_call-6 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: Who is on my task list to call? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who is on my task list to call? + context: + id: 01KHJQY8PT7Q7XXARKEWVFG1CS + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:16.763027+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:16.766163+00:00 + - role: user + content: Who is on my task list to call? + attachments: null + created: 2026-02-16 08:09:16.763097+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4d2cbf0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:16.766173+00:00 + duration_ms: 25.664 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-7.yaml new file mode 100644 index 000000000..1d9240201 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-7.yaml @@ -0,0 +1,224 @@ +--- +uuid: 9c566660-a51c-48ae-8f51-9321601501a6 +task_id: urban_loft_au_todo-who_is_on_my_task_list_to_call-7 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: Who is on my task list to call? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who is on my task list to call? + context: + id: 01KHJQY8WYGJ1F680V9J62VYD8 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:16.958182+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:16.960792+00:00 + - role: user + content: Who is on my task list to call? + attachments: null + created: 2026-02-16 08:09:16.958280+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c18d7480>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:16.960802+00:00 + duration_ms: 24.114 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-8.yaml new file mode 100644 index 000000000..38cbfc734 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-8.yaml @@ -0,0 +1,224 @@ +--- +uuid: 96961d8c-3508-4bb5-a895-e3f817e89aee +task_id: urban_loft_au_todo-who_is_on_my_task_list_to_call-8 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: Who is on my task list to call? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who is on my task list to call? + context: + id: 01KHJQY94K7QY054590ZYCV482 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:17.203469+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:17.215758+00:00 + - role: user + content: Who is on my task list to call? + attachments: null + created: 2026-02-16 08:09:17.203542+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47d4211bc0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:17.215776+00:00 + duration_ms: 33.59 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-9.yaml new file mode 100644 index 000000000..6db9f7e28 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_todo-who_is_on_my_task_list_to_call-9.yaml @@ -0,0 +1,224 @@ +--- +uuid: 8e65ccc1-d3b0-4d7b-be2d-fec686a0c01c +task_id: urban_loft_au_todo-who_is_on_my_task_list_to_call-9 +model_id: gemma-3-27b-it +category: +- todo +- question +task: + input_text: Who is on my task list to call? + expect_response: + - Liza +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Who is on my task list to call? + context: + id: 01KHJQY9H3VD70JXJD7KZKG9N2 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:17.603417+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:17.604975+00:00 + - role: user + content: Who is on my task list to call? + attachments: null + created: 2026-02-16 08:09:17.603486+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c28e37f0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:17.604985+00:00 + duration_ms: 27.847 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-0.yaml new file mode 100644 index 000000000..2a14b6e83 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-0.yaml @@ -0,0 +1,224 @@ +--- +uuid: 427a90ce-b324-4788-8eb5-96da9a52c521 +task_id: urban_loft_au_weather-is_today_sunny_or_cloudy-0 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: Is today sunny or cloudy? + expect_response: + - cloudy +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is today sunny or cloudy? + context: + id: 01KHJQYM97WH9G141AQ3NZ4K1B + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:28.616003+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:28.617546+00:00 + - role: user + content: Is today sunny or cloudy? + attachments: null + created: 2026-02-16 08:09:28.616071+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4494880>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:28.617555+00:00 + duration_ms: 24.686 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-1.yaml new file mode 100644 index 000000000..5fe562488 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-1.yaml @@ -0,0 +1,224 @@ +--- +uuid: 97c56db1-3a56-484c-84ca-213fed0199af +task_id: urban_loft_au_weather-is_today_sunny_or_cloudy-1 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: Is today sunny or cloudy? + expect_response: + - cloudy +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is today sunny or cloudy? + context: + id: 01KHJQYMFCG13K3KVC1EK1J65H + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:28.812211+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:28.813682+00:00 + - role: user + content: Is today sunny or cloudy? + attachments: null + created: 2026-02-16 08:09:28.812320+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2426560>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:28.813692+00:00 + duration_ms: 45.997 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-2.yaml new file mode 100644 index 000000000..6b9bcd508 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-2.yaml @@ -0,0 +1,224 @@ +--- +uuid: ce5488fe-4876-40b8-8281-eb2f69204397 +task_id: urban_loft_au_weather-is_today_sunny_or_cloudy-2 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: Is today sunny or cloudy? + expect_response: + - cloudy +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is today sunny or cloudy? + context: + id: 01KHJQYMNZR1SJQNFPNFS8M9G9 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:29.023874+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:29.025700+00:00 + - role: user + content: Is today sunny or cloudy? + attachments: null + created: 2026-02-16 08:09:29.023941+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c248ef00>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:29.025710+00:00 + duration_ms: 23.024 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-3.yaml new file mode 100644 index 000000000..2cb28c690 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-3.yaml @@ -0,0 +1,224 @@ +--- +uuid: 23ad4aea-ec58-4a75-bcca-5581ad8423a5 +task_id: urban_loft_au_weather-is_today_sunny_or_cloudy-3 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: Is today sunny or cloudy? + expect_response: + - cloudy +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is today sunny or cloudy? + context: + id: 01KHJQYMW662AASDFHYEKKR5V8 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:29.222552+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:29.224379+00:00 + - role: user + content: Is today sunny or cloudy? + attachments: null + created: 2026-02-16 08:09:29.222621+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4dfe140>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:29.224388+00:00 + duration_ms: 23.425 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-4.yaml new file mode 100644 index 000000000..b4488ab59 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-4.yaml @@ -0,0 +1,224 @@ +--- +uuid: 4d5378c9-1fb3-47ca-a471-08d0f78a13eb +task_id: urban_loft_au_weather-is_today_sunny_or_cloudy-4 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: Is today sunny or cloudy? + expect_response: + - cloudy +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is today sunny or cloudy? + context: + id: 01KHJQYN2J99SC8R3K7JQKACEY + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:29.426579+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:29.431304+00:00 + - role: user + content: Is today sunny or cloudy? + attachments: null + created: 2026-02-16 08:09:29.426650+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c3807110>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:29.431315+00:00 + duration_ms: 28.262 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-5.yaml new file mode 100644 index 000000000..22393e7d8 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-5.yaml @@ -0,0 +1,224 @@ +--- +uuid: d8d9195c-607d-4b4d-a443-257ea8e7e117 +task_id: urban_loft_au_weather-is_today_sunny_or_cloudy-5 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: Is today sunny or cloudy? + expect_response: + - cloudy +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is today sunny or cloudy? + context: + id: 01KHJQYN8Y5R225ZQHT2M2DFV1 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:29.630811+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:29.632430+00:00 + - role: user + content: Is today sunny or cloudy? + attachments: null + created: 2026-02-16 08:09:29.630883+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c3a328d0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:29.632443+00:00 + duration_ms: 175.476 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-6.yaml new file mode 100644 index 000000000..3bafa0f90 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-6.yaml @@ -0,0 +1,224 @@ +--- +uuid: 8f5b677f-7f16-47ae-9f55-7ad0e5844667 +task_id: urban_loft_au_weather-is_today_sunny_or_cloudy-6 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: Is today sunny or cloudy? + expect_response: + - cloudy +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is today sunny or cloudy? + context: + id: 01KHJQYNMAKP8AMFXWHBXKKNM5 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:29.994782+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:30.001385+00:00 + - role: user + content: Is today sunny or cloudy? + attachments: null + created: 2026-02-16 08:09:29.994852+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4d2d9b0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:30.001396+00:00 + duration_ms: 34.492 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-7.yaml new file mode 100644 index 000000000..2b991cc04 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-7.yaml @@ -0,0 +1,224 @@ +--- +uuid: 2f4193d5-4edc-47eb-bf9d-3af0627da511 +task_id: urban_loft_au_weather-is_today_sunny_or_cloudy-7 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: Is today sunny or cloudy? + expect_response: + - cloudy +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is today sunny or cloudy? + context: + id: 01KHJQYNV7CMKHMMHXA458ANE6 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:30.215592+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:30.218221+00:00 + - role: user + content: Is today sunny or cloudy? + attachments: null + created: 2026-02-16 08:09:30.215662+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2b44040>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:30.218231+00:00 + duration_ms: 173.356 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-8.yaml new file mode 100644 index 000000000..742e9deee --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-8.yaml @@ -0,0 +1,224 @@ +--- +uuid: 6fd0557a-be50-49dd-aa12-7594a2b3f1c0 +task_id: urban_loft_au_weather-is_today_sunny_or_cloudy-8 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: Is today sunny or cloudy? + expect_response: + - cloudy +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is today sunny or cloudy? + context: + id: 01KHJQYP5YBPV5EN3N716BD1X7 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:30.558275+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:30.561239+00:00 + - role: user + content: Is today sunny or cloudy? + attachments: null + created: 2026-02-16 08:09:30.558344+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f45c09e0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:30.561264+00:00 + duration_ms: 25.403 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-9.yaml new file mode 100644 index 000000000..992b71135 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-is_today_sunny_or_cloudy-9.yaml @@ -0,0 +1,224 @@ +--- +uuid: 9219b0e9-02e5-4256-8e45-6ee1110b6caf +task_id: urban_loft_au_weather-is_today_sunny_or_cloudy-9 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: Is today sunny or cloudy? + expect_response: + - cloudy +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Is today sunny or cloudy? + context: + id: 01KHJQYPCT7WTT7XKCQN0WN149 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:30.778422+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:30.781396+00:00 + - role: user + content: Is today sunny or cloudy? + attachments: null + created: 2026-02-16 08:09:30.778491+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f49cdbc0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:30.781406+00:00 + duration_ms: 27.659 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-0.yaml new file mode 100644 index 000000000..70fe2c154 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-0.yaml @@ -0,0 +1,224 @@ +--- +uuid: 1aae0d81-5e7d-45b4-b9f6-d594c686b595 +task_id: urban_loft_au_weather-what_is_the_temperature_outside-0 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: What is the temperature outside? + expect_response: + - '30' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the temperature outside? + context: + id: 01KHJQYH52XGR3VXG3M316HQHN + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:25.411048+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:25.413166+00:00 + - role: user + content: What is the temperature outside? + attachments: null + created: 2026-02-16 08:09:25.411119+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c31ceb90>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:25.413176+00:00 + duration_ms: 24.08 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-1.yaml new file mode 100644 index 000000000..c5b1aee70 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-1.yaml @@ -0,0 +1,224 @@ +--- +uuid: ccad2a8a-f4ed-452c-8438-ee13b33a98d8 +task_id: urban_loft_au_weather-what_is_the_temperature_outside-1 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: What is the temperature outside? + expect_response: + - '30' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the temperature outside? + context: + id: 01KHJQYHB8CGC6ATJJCMKQ9PYH + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:25.609063+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:25.610657+00:00 + - role: user + content: What is the temperature outside? + attachments: null + created: 2026-02-16 08:09:25.609134+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1818880>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:25.610666+00:00 + duration_ms: 23.832 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-2.yaml new file mode 100644 index 000000000..5337a41bf --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-2.yaml @@ -0,0 +1,224 @@ +--- +uuid: f1b004fe-f820-44ce-b066-c908c5c18d81 +task_id: urban_loft_au_weather-what_is_the_temperature_outside-2 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: What is the temperature outside? + expect_response: + - '30' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the temperature outside? + context: + id: 01KHJQYHJT54ZERDMCJXHT6VXQ + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:25.851002+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:25.852468+00:00 + - role: user + content: What is the temperature outside? + attachments: null + created: 2026-02-16 08:09:25.851070+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c371f060>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:25.852477+00:00 + duration_ms: 29.862 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-3.yaml new file mode 100644 index 000000000..762fe417c --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-3.yaml @@ -0,0 +1,224 @@ +--- +uuid: d1a6b9fe-e4e4-4872-b7b1-f9220aaee703 +task_id: urban_loft_au_weather-what_is_the_temperature_outside-3 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: What is the temperature outside? + expect_response: + - '30' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the temperature outside? + context: + id: 01KHJQYHS2NFWRG8JV2652EPCT + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:26.050791+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:26.055576+00:00 + - role: user + content: What is the temperature outside? + attachments: null + created: 2026-02-16 08:09:26.050868+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c251dbc0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:26.055588+00:00 + duration_ms: 173.984 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-4.yaml new file mode 100644 index 000000000..5c6074c6e --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-4.yaml @@ -0,0 +1,224 @@ +--- +uuid: 903a01d3-5794-4cc2-8978-41bbbc3c174a +task_id: urban_loft_au_weather-what_is_the_temperature_outside-4 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: What is the temperature outside? + expect_response: + - '30' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the temperature outside? + context: + id: 01KHJQYJFTRKH4XG1Z8M7G72V6 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:26.778882+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:26.783166+00:00 + - role: user + content: What is the temperature outside? + attachments: null + created: 2026-02-16 08:09:26.778954+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1a19590>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:26.783176+00:00 + duration_ms: 33.771 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-5.yaml new file mode 100644 index 000000000..194da6dd2 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-5.yaml @@ -0,0 +1,224 @@ +--- +uuid: 847a192e-4605-494b-a170-46c2b96c63c7 +task_id: urban_loft_au_weather-what_is_the_temperature_outside-5 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: What is the temperature outside? + expect_response: + - '30' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the temperature outside? + context: + id: 01KHJQYJPKVP1EKWBT4S0FXR14 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:26.995720+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:26.997164+00:00 + - role: user + content: What is the temperature outside? + attachments: null + created: 2026-02-16 08:09:26.995789+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c2fbfa00>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:26.997174+00:00 + duration_ms: 231.393 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-6.yaml new file mode 100644 index 000000000..0e3169358 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-6.yaml @@ -0,0 +1,224 @@ +--- +uuid: e6563d72-343a-4061-95ca-bf34fd3ccb3b +task_id: urban_loft_au_weather-what_is_the_temperature_outside-6 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: What is the temperature outside? + expect_response: + - '30' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the temperature outside? + context: + id: 01KHJQYK3AXK5V2JTKMGHCG2YT + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:27.402417+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:27.403958+00:00 + - role: user + content: What is the temperature outside? + attachments: null + created: 2026-02-16 08:09:27.402486+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f633a4b0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:27.403967+00:00 + duration_ms: 22.762 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-7.yaml new file mode 100644 index 000000000..6f366cb24 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-7.yaml @@ -0,0 +1,224 @@ +--- +uuid: f965e22c-e483-4794-b2e4-7c1d18c608d2 +task_id: urban_loft_au_weather-what_is_the_temperature_outside-7 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: What is the temperature outside? + expect_response: + - '30' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the temperature outside? + context: + id: 01KHJQYK9VK24FQ4JNHTXAM2WD + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:27.611495+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:27.613031+00:00 + - role: user + content: What is the temperature outside? + attachments: null + created: 2026-02-16 08:09:27.611564+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4e37740>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:27.613040+00:00 + duration_ms: 31.104 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-8.yaml new file mode 100644 index 000000000..d98be897f --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-8.yaml @@ -0,0 +1,224 @@ +--- +uuid: ce916c59-004b-4693-8ae3-057546f2417b +task_id: urban_loft_au_weather-what_is_the_temperature_outside-8 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: What is the temperature outside? + expect_response: + - '30' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the temperature outside? + context: + id: 01KHJQYKH073B505J2BWP43ZE0 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:27.841013+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:27.843808+00:00 + - role: user + content: What is the temperature outside? + attachments: null + created: 2026-02-16 08:09:27.841082+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f44b38a0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:27.843818+00:00 + duration_ms: 251.828 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-9.yaml new file mode 100644 index 000000000..d1b70c432 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-what_is_the_temperature_outside-9.yaml @@ -0,0 +1,224 @@ +--- +uuid: 5ea6f6b3-b537-4633-96ec-455cb9eeae96 +task_id: urban_loft_au_weather-what_is_the_temperature_outside-9 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: What is the temperature outside? + expect_response: + - '30' +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: What is the temperature outside? + context: + id: 01KHJQYM0TWV3AD9X7J9565E4R + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:28.346842+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:28.348460+00:00 + - role: user + content: What is the temperature outside? + attachments: null + created: 2026-02-16 08:09:28.346912+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1897ed0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:28.348470+00:00 + duration_ms: 80.763 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-0.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-0.yaml new file mode 100644 index 000000000..cf179bd31 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-0.yaml @@ -0,0 +1,224 @@ +--- +uuid: 3804239b-80ed-4873-b22b-2d5c7095a472 +task_id: urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-0 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: Will the weather today be sunny or cloudy? + expect_response: + - sunny +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Will the weather today be sunny or cloudy? + context: + id: 01KHJQYPKN5HSC9RGTJQQFV2AD + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:30.997421+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:31.000465+00:00 + - role: user + content: Will the weather today be sunny or cloudy? + attachments: null + created: 2026-02-16 08:09:30.997491+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1b2ecf0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:31.000476+00:00 + duration_ms: 26.688 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-1.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-1.yaml new file mode 100644 index 000000000..90b12af43 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-1.yaml @@ -0,0 +1,224 @@ +--- +uuid: ca59a018-6e78-4e9f-9a3f-935a9355067e +task_id: urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-1 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: Will the weather today be sunny or cloudy? + expect_response: + - sunny +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Will the weather today be sunny or cloudy? + context: + id: 01KHJQYPSZ4ZJ7MYDYVTYADW1N + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:31.200092+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:31.202493+00:00 + - role: user + content: Will the weather today be sunny or cloudy? + attachments: null + created: 2026-02-16 08:09:31.200161+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c29064b0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:31.202503+00:00 + duration_ms: 34.868 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-2.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-2.yaml new file mode 100644 index 000000000..7a5c26c8b --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-2.yaml @@ -0,0 +1,224 @@ +--- +uuid: c709fb58-be34-4777-9f7e-349efb633560 +task_id: urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-2 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: Will the weather today be sunny or cloudy? + expect_response: + - sunny +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Will the weather today be sunny or cloudy? + context: + id: 01KHJQYQ0WSRXH7FW5XY4BS644 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:31.420732+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:31.423519+00:00 + - role: user + content: Will the weather today be sunny or cloudy? + attachments: null + created: 2026-02-16 08:09:31.420799+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f47d5430>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:31.423529+00:00 + duration_ms: 32.605 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-3.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-3.yaml new file mode 100644 index 000000000..39424c947 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-3.yaml @@ -0,0 +1,224 @@ +--- +uuid: ea905aa5-55de-4fa5-90bc-430dfd010271 +task_id: urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-3 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: Will the weather today be sunny or cloudy? + expect_response: + - sunny +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Will the weather today be sunny or cloudy? + context: + id: 01KHJQYQ9CN5XGHSKASJJJNE9J + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:31.692803+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:31.694393+00:00 + - role: user + content: Will the weather today be sunny or cloudy? + attachments: null + created: 2026-02-16 08:09:31.692872+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c33f2090>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:31.694403+00:00 + duration_ms: 25.053 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-4.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-4.yaml new file mode 100644 index 000000000..239df8106 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-4.yaml @@ -0,0 +1,224 @@ +--- +uuid: bdec5ce3-a9a6-45fe-940f-aaf1461eb0fe +task_id: urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-4 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: Will the weather today be sunny or cloudy? + expect_response: + - sunny +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Will the weather today be sunny or cloudy? + context: + id: 01KHJQYQF99V5Q4PY204DB38W2 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:31.881796+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:31.889744+00:00 + - role: user + content: Will the weather today be sunny or cloudy? + attachments: null + created: 2026-02-16 08:09:31.881864+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c1b64040>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:31.889757+00:00 + duration_ms: 28.549 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-5.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-5.yaml new file mode 100644 index 000000000..1e3ffc52c --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-5.yaml @@ -0,0 +1,224 @@ +--- +uuid: 7cb5cb44-2a6a-49d4-8686-971c4e9ef0b3 +task_id: urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-5 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: Will the weather today be sunny or cloudy? + expect_response: + - sunny +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Will the weather today be sunny or cloudy? + context: + id: 01KHJQYQP4FQGDXDVGR3C0WJ07 + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:32.100655+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:32.102602+00:00 + - role: user + content: Will the weather today be sunny or cloudy? + attachments: null + created: 2026-02-16 08:09:32.100724+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c22750c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:32.102612+00:00 + duration_ms: 28.681 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-6.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-6.yaml new file mode 100644 index 000000000..cb7d90f5b --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-6.yaml @@ -0,0 +1,224 @@ +--- +uuid: a579d55d-66ff-4390-85cd-fa547b5ed31f +task_id: urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-6 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: Will the weather today be sunny or cloudy? + expect_response: + - sunny +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Will the weather today be sunny or cloudy? + context: + id: 01KHJQYQX18DSY9J5YD158BPQV + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:32.321282+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:32.323141+00:00 + - role: user + content: Will the weather today be sunny or cloudy? + attachments: null + created: 2026-02-16 08:09:32.321355+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c3bcd850>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:32.323151+00:00 + duration_ms: 26.122 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-7.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-7.yaml new file mode 100644 index 000000000..a85e43648 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-7.yaml @@ -0,0 +1,224 @@ +--- +uuid: d718a242-0cdf-4b26-a626-7ec066d9d56b +task_id: urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-7 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: Will the weather today be sunny or cloudy? + expect_response: + - sunny +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Will the weather today be sunny or cloudy? + context: + id: 01KHJQYR377K3A60BHEWJYMHXV + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:32.519815+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:32.526152+00:00 + - role: user + content: Will the weather today be sunny or cloudy? + attachments: null + created: 2026-02-16 08:09:32.519883+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47f4c60f60>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:32.526163+00:00 + duration_ms: 34.271 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-8.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-8.yaml new file mode 100644 index 000000000..4712068ed --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-8.yaml @@ -0,0 +1,224 @@ +--- +uuid: 86ba6557-424e-4c4f-8efe-800d488ff9b3 +task_id: urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-8 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: Will the weather today be sunny or cloudy? + expect_response: + - sunny +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Will the weather today be sunny or cloudy? + context: + id: 01KHJQYRAJHAD2M5E8A4CHTJPD + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:32.754505+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:32.761287+00:00 + - role: user + content: Will the weather today be sunny or cloudy? + attachments: null + created: 2026-02-16 08:09:32.754574+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47d42f3ab0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:32.761298+00:00 + duration_ms: 182.98 + tries: 1 diff --git a/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-9.yaml b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-9.yaml new file mode 100644 index 000000000..3e8dbaec9 --- /dev/null +++ b/reports/questions/2026.2.2/gemma-3-27b-it/urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-9.yaml @@ -0,0 +1,224 @@ +--- +uuid: 1db27983-031e-4dc1-bbb4-79ac5db22fab +task_id: urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-9 +model_id: gemma-3-27b-it +category: +- weather +- question +task: + input_text: Will the weather today be sunny or cloudy? + expect_response: + - sunny +response: Error talking to API +context: + conversation_trace: + - event_type: async_process + data: + text: Will the weather today be sunny or cloudy? + context: + id: 01KHJQYRP08RFZQJBMGE0BE0KP + parent_id: null + user_id: null + conversation_id: null + device_id: null + satellite_id: null + language: en + agent_id: conversation.mock_title + extra_system_prompt: null + timestamp: 2026-02-16 08:09:33.120945+00:00 + - event_type: agent_detail + data: + messages: + - role: system + content: | + You are a voice assistant for Home Assistant. + Answer questions about the world truthfully. + Answer in plain text. Keep it simple and to the point. + When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain. + When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type. + This device is not able to start timers. + You ARE equipped to answer questions about the current state of + the home using the `GetLiveContext` tool. This is a primary function. Do not state you lack the + functionality if the question requires live data. + If the user asks about device existence/type (e.g., "Do I have lights in the bedroom?"): Answer + from the static context below. + If the user asks about the CURRENT state, value, or mode (e.g., "Is the lock locked?", + "Is the fan on?", "What mode is the thermostat in?", "What is the temperature outside?"): + 1. Recognize this requires live data. + 2. You MUST call `GetLiveContext`. This tool will provide the needed real-time information (like temperature from the local weather, lock status, etc.). + 3. Use the tool's response** to answer the user accurately (e.g., "The temperature outside is [value from tool]."). + For general knowledge questions not about the home: Answer truthfully from internal knowledge. + + Static Context: An overview of the areas and the devices in this smart home: + - names: Balcony Light + domain: light + areas: Balcony + - names: Bedroom Light + domain: light + areas: Bedroom + - names: Kitchen Light + domain: light + areas: Kitchen + - names: Living Room + domain: light + areas: Living Room + - names: Motion Sensor + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: binary_sensor + areas: Rooftop Terrace + - names: Motion Sensor Battery + domain: sensor + areas: Rooftop Terrace + - names: Samsung + domain: media_player + areas: Living Room + - names: Smart Plug + domain: switch + areas: Rooftop Terrace + - names: Smart Plug Energy + domain: sensor + areas: Rooftop Terrace + - names: Tasks + domain: todo + - names: Terrace Light + domain: light + areas: Rooftop Terrace + - names: Weather Service + domain: weather + areas: Rooftop Terrace + created: 2026-02-16 08:09:33.122586+00:00 + - role: user + content: Will the weather today be sunny or cloudy? + attachments: null + created: 2026-02-16 08:09:33.121016+00:00 + tools: + - name: HassMediaUnpause + description: Resumes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPause + description: Pauses a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaNext + description: Skips a media player to the next item + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassMediaPrevious + description: Replays the previous item for a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None)}' + - name: HassSetVolume + description: Sets the volume percentage of a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''volume_level'': All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, + max_included=True, msg=None), . + at 0x7f47c38b05c0>, msg=None)}' + - name: HassSetVolumeRelative + description: Increases or decreases the volume of a media player + parameters: '{''volume_step'': Any(''up'', ''down'', All(Coerce(int, msg=None), + Range(min=-100, max=100, min_included=True, max_included=True, msg=None), + at 0x7f47ecfd4ca0>, msg=None), + msg=None), ''name'': , ''area'': , ''floor'': }' + - name: HassMediaPlayerMute + description: Mutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaPlayerUnmute + description: Unmutes a media player + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [In([''media_player''])], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver''])], msg=None), + ''is_volume_muted'': }' + - name: HassMediaSearchAndPlay + description: Searches for media and plays the first result + parameters: '{''search_query'': , ''media_class'': + In([''album'', ''app'', ''artist'', ''channel'', ''composer'', ''contributing_artist'', + ''directory'', ''episode'', ''game'', ''genre'', ''image'', ''movie'', ''music'', + ''playlist'', ''podcast'', ''season'', ''track'', ''tv_show'', ''url'', + ''video'']), ''name'': , ''area'': , ''floor'': }' + - name: HassListAddItem + description: Add item to a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassListCompleteItem + description: Complete item on a todo list + parameters: '{''item'': , ''name'': + }' + - name: HassLightSet + description: Sets the brightness percentage or color of a light + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''color'': , ''temperature'': All(Coerce(int, msg=None), Range(min=0, + max=None, min_included=True, max_included=True, msg=None), msg=None), ''brightness'': + All(Coerce(int, msg=None), Range(min=0, max=100, min_included=True, max_included=True, + msg=None), msg=None)}' + - name: HassTurnOn + description: Turns on/opens/presses a device or entity. For locks, this performs + a 'lock' action. Use for requests like 'turn on', 'activate', 'enable', + or 'lock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassTurnOff + description: Turns off/closes a device or entity. For locks, this performs + an 'unlock' action. Use for requests like 'turn off', 'deactivate', 'disable', + or 'unlock'. + parameters: '{Any(''name'', ''area'', ''floor'', msg=None): , ''domain'': All(, + [], msg=None), ''device_class'': All(, [In([''tv'', ''speaker'', ''receiver'', + ''identify'', ''restart'', ''update'', ''water'', ''gas'', ''awning'', ''blind'', + ''curtain'', ''damper'', ''door'', ''garage'', ''gate'', ''shade'', ''shutter'', + ''window'', ''outlet'', ''switch''])], msg=None)}' + - name: HassCancelAllTimers + description: Cancels all timers + parameters: '{''area'': }' + - name: GetDateTime + description: Provides the current date and time. + parameters: '{}' + - name: calendar_get_events + description: Get events from a calendar. When asked if something happens, + search the whole week. Results are RFC 5545 which means 'end' is exclusive. + parameters: '{''calendar'': In([''Personal'']), ''range'': In([''today'', + ''week''])}' + - name: todo_get_items + description: Query a to-do list to find out what items are on it. Use this + to answer questions like 'What's on my task list?' or 'Read my grocery list'. + Filters items by status (needs_action, completed, all). + parameters: '{''todo_list'': In([''Tasks'']), ''status'': In([''needs_action'', + ''completed'', ''all''])}' + - name: GetLiveContext + description: 'Provides real-time information about the CURRENT state, value, + or mode of devices, sensors, entities, or areas. Use this tool for: 1. Answering + questions about current conditions (e.g., ''Is the light on?''). 2. As the + first step in conditional actions (e.g., ''If the weather is rainy, turn + off sprinklers'' requires checking the weather first).' + parameters: '{}' + timestamp: 2026-02-16 08:09:33.122595+00:00 + duration_ms: 33.618 + tries: 1 diff --git a/reports/questions/2026.2.2/report.csv b/reports/questions/2026.2.2/report.csv new file mode 100644 index 000000000..a7a874bb9 --- /dev/null +++ b/reports/questions/2026.2.2/report.csv @@ -0,0 +1,371 @@ +task_id,model_id,category,text,tool_call,response,task_name,label,details +"suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-0","gemma-3-27b-it","climate,question","If the temperature in the guest room is above 23 then set the target temperature to 22","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'climate.guest_room_thermostat': {'expected': {'temperature': 22.0}, 'got': {'temperature': 26}}}" +"suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-1","gemma-3-27b-it","climate,question","If the temperature in the guest room is above 23 then set the target temperature to 22","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'climate.guest_room_thermostat': {'expected': {'temperature': 22.0}, 'got': {'temperature': 26}}}" +"suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-2","gemma-3-27b-it","climate,question","If the temperature in the guest room is above 23 then set the target temperature to 22","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'climate.guest_room_thermostat': {'expected': {'temperature': 22.0}, 'got': {'temperature': 26}}}" +"suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-3","gemma-3-27b-it","climate,question","If the temperature in the guest room is above 23 then set the target temperature to 22","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'climate.guest_room_thermostat': {'expected': {'temperature': 22.0}, 'got': {'temperature': 26}}}" +"suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-4","gemma-3-27b-it","climate,question","If the temperature in the guest room is above 23 then set the target temperature to 22","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'climate.guest_room_thermostat': {'expected': {'temperature': 22.0}, 'got': {'temperature': 26}}}" +"suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-5","gemma-3-27b-it","climate,question","If the temperature in the guest room is above 23 then set the target temperature to 22","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'climate.guest_room_thermostat': {'expected': {'temperature': 22.0}, 'got': {'temperature': 26}}}" +"suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-6","gemma-3-27b-it","climate,question","If the temperature in the guest room is above 23 then set the target temperature to 22","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'climate.guest_room_thermostat': {'expected': {'temperature': 22.0}, 'got': {'temperature': 26}}}" +"suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-7","gemma-3-27b-it","climate,question","If the temperature in the guest room is above 23 then set the target temperature to 22","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'climate.guest_room_thermostat': {'expected': {'temperature': 22.0}, 'got': {'temperature': 26}}}" +"suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-8","gemma-3-27b-it","climate,question","If the temperature in the guest room is above 23 then set the target temperature to 22","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'climate.guest_room_thermostat': {'expected': {'temperature': 22.0}, 'got': {'temperature': 26}}}" +"suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-9","gemma-3-27b-it","climate,question","If the temperature in the guest room is above 23 then set the target temperature to 22","","Error talking to API","eval-test_expected_states","Bad","AssertionError: assert not {'climate.guest_room_thermostat': {'expected': {'temperature': 22.0}, 'got': {'temperature': 26}}}" +"suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-0","gemma-3-27b-it","climate,question","Set the target temperature to 22 in the guest room if it is above 23","","Error talking to API","eval-test_expected_states","Good","" +"suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-1","gemma-3-27b-it","climate,question","Set the target temperature to 22 in the guest room if it is above 23","","Error talking to API","eval-test_expected_states","Good","" +"suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-2","gemma-3-27b-it","climate,question","Set the target temperature to 22 in the guest room if it is above 23","","Error talking to API","eval-test_expected_states","Good","" +"suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-3","gemma-3-27b-it","climate,question","Set the target temperature to 22 in the guest room if it is above 23","","Error talking to API","eval-test_expected_states","Good","" +"suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-4","gemma-3-27b-it","climate,question","Set the target temperature to 22 in the guest room if it is above 23","","Error talking to API","eval-test_expected_states","Good","" +"suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-5","gemma-3-27b-it","climate,question","Set the target temperature to 22 in the guest room if it is above 23","","Error talking to API","eval-test_expected_states","Good","" +"suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-6","gemma-3-27b-it","climate,question","Set the target temperature to 22 in the guest room if it is above 23","","Error talking to API","eval-test_expected_states","Good","" +"suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-7","gemma-3-27b-it","climate,question","Set the target temperature to 22 in the guest room if it is above 23","","Error talking to API","eval-test_expected_states","Good","" +"suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-8","gemma-3-27b-it","climate,question","Set the target temperature to 22 in the guest room if it is above 23","","Error talking to API","eval-test_expected_states","Good","" +"suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-9","gemma-3-27b-it","climate,question","Set the target temperature to 22 in the guest room if it is above 23","","Error talking to API","eval-test_expected_states","Good","" +"suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-0","gemma-3-27b-it","sensor,question","Is the kitchen sink battery low?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', '95']" +"suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-1","gemma-3-27b-it","sensor,question","Is the kitchen sink battery low?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', '95']" +"suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-2","gemma-3-27b-it","sensor,question","Is the kitchen sink battery low?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', '95']" +"suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-3","gemma-3-27b-it","sensor,question","Is the kitchen sink battery low?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', '95']" +"suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-4","gemma-3-27b-it","sensor,question","Is the kitchen sink battery low?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', '95']" +"suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-5","gemma-3-27b-it","sensor,question","Is the kitchen sink battery low?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', '95']" +"suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-6","gemma-3-27b-it","sensor,question","Is the kitchen sink battery low?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', '95']" +"suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-7","gemma-3-27b-it","sensor,question","Is the kitchen sink battery low?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', '95']" +"suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-8","gemma-3-27b-it","sensor,question","Is the kitchen sink battery low?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', '95']" +"suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-9","gemma-3-27b-it","sensor,question","Is the kitchen sink battery low?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', '95']" +"suburban_familiy_home_be_switch-is_the_kitchen_plug_on-0","gemma-3-27b-it","switch,question","Is the kitchen plug on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on']" +"suburban_familiy_home_be_switch-is_the_kitchen_plug_on-1","gemma-3-27b-it","switch,question","Is the kitchen plug on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on']" +"suburban_familiy_home_be_switch-is_the_kitchen_plug_on-2","gemma-3-27b-it","switch,question","Is the kitchen plug on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on']" +"suburban_familiy_home_be_switch-is_the_kitchen_plug_on-3","gemma-3-27b-it","switch,question","Is the kitchen plug on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on']" +"suburban_familiy_home_be_switch-is_the_kitchen_plug_on-4","gemma-3-27b-it","switch,question","Is the kitchen plug on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on']" +"suburban_familiy_home_be_switch-is_the_kitchen_plug_on-5","gemma-3-27b-it","switch,question","Is the kitchen plug on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on']" +"suburban_familiy_home_be_switch-is_the_kitchen_plug_on-6","gemma-3-27b-it","switch,question","Is the kitchen plug on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on']" +"suburban_familiy_home_be_switch-is_the_kitchen_plug_on-7","gemma-3-27b-it","switch,question","Is the kitchen plug on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on']" +"suburban_familiy_home_be_switch-is_the_kitchen_plug_on-8","gemma-3-27b-it","switch,question","Is the kitchen plug on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on']" +"suburban_familiy_home_be_switch-is_the_kitchen_plug_on-9","gemma-3-27b-it","switch,question","Is the kitchen plug on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on']" +"suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-0","gemma-3-27b-it","switch,question","Is the plug in the kitchen on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off']" +"suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-1","gemma-3-27b-it","switch,question","Is the plug in the kitchen on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off']" +"suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-2","gemma-3-27b-it","switch,question","Is the plug in the kitchen on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off']" +"suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-3","gemma-3-27b-it","switch,question","Is the plug in the kitchen on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off']" +"suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-4","gemma-3-27b-it","switch,question","Is the plug in the kitchen on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off']" +"suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-5","gemma-3-27b-it","switch,question","Is the plug in the kitchen on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off']" +"suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-6","gemma-3-27b-it","switch,question","Is the plug in the kitchen on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off']" +"suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-7","gemma-3-27b-it","switch,question","Is the plug in the kitchen on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off']" +"suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-8","gemma-3-27b-it","switch,question","Is the plug in the kitchen on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off']" +"suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-9","gemma-3-27b-it","switch,question","Is the plug in the kitchen on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off']" +"suburban_familiy_home_be_valve-are_the_sprinklers_on-0","gemma-3-27b-it","valve,question","Are the sprinklers on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on', 'open']" +"suburban_familiy_home_be_valve-are_the_sprinklers_on-1","gemma-3-27b-it","valve,question","Are the sprinklers on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on', 'open']" +"suburban_familiy_home_be_valve-are_the_sprinklers_on-2","gemma-3-27b-it","valve,question","Are the sprinklers on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on', 'open']" +"suburban_familiy_home_be_valve-are_the_sprinklers_on-3","gemma-3-27b-it","valve,question","Are the sprinklers on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on', 'open']" +"suburban_familiy_home_be_valve-are_the_sprinklers_on-4","gemma-3-27b-it","valve,question","Are the sprinklers on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on', 'open']" +"suburban_familiy_home_be_valve-are_the_sprinklers_on-5","gemma-3-27b-it","valve,question","Are the sprinklers on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on', 'open']" +"suburban_familiy_home_be_valve-are_the_sprinklers_on-6","gemma-3-27b-it","valve,question","Are the sprinklers on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on', 'open']" +"suburban_familiy_home_be_valve-are_the_sprinklers_on-7","gemma-3-27b-it","valve,question","Are the sprinklers on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on', 'open']" +"suburban_familiy_home_be_valve-are_the_sprinklers_on-8","gemma-3-27b-it","valve,question","Are the sprinklers on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on', 'open']" +"suburban_familiy_home_be_valve-are_the_sprinklers_on-9","gemma-3-27b-it","valve,question","Are the sprinklers on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on', 'open']" +"suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-0","gemma-3-27b-it","valve,question","Please tell me, are the sprinklers on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off', 'closed']" +"suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-1","gemma-3-27b-it","valve,question","Please tell me, are the sprinklers on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off', 'closed']" +"suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-2","gemma-3-27b-it","valve,question","Please tell me, are the sprinklers on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off', 'closed']" +"suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-3","gemma-3-27b-it","valve,question","Please tell me, are the sprinklers on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off', 'closed']" +"suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-4","gemma-3-27b-it","valve,question","Please tell me, are the sprinklers on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off', 'closed']" +"suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-5","gemma-3-27b-it","valve,question","Please tell me, are the sprinklers on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off', 'closed']" +"suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-6","gemma-3-27b-it","valve,question","Please tell me, are the sprinklers on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off', 'closed']" +"suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-7","gemma-3-27b-it","valve,question","Please tell me, are the sprinklers on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off', 'closed']" +"suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-8","gemma-3-27b-it","valve,question","Please tell me, are the sprinklers on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off', 'closed']" +"suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-9","gemma-3-27b-it","valve,question","Please tell me, are the sprinklers on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off', 'closed']" +"urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-0","gemma-3-27b-it","calendar,question","According to my calendar, is anyone visiting?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['liza']" +"urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-1","gemma-3-27b-it","calendar,question","According to my calendar, is anyone visiting?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['liza']" +"urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-2","gemma-3-27b-it","calendar,question","According to my calendar, is anyone visiting?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['liza']" +"urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-3","gemma-3-27b-it","calendar,question","According to my calendar, is anyone visiting?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['liza']" +"urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-4","gemma-3-27b-it","calendar,question","According to my calendar, is anyone visiting?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['liza']" +"urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-5","gemma-3-27b-it","calendar,question","According to my calendar, is anyone visiting?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['liza']" +"urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-6","gemma-3-27b-it","calendar,question","According to my calendar, is anyone visiting?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['liza']" +"urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-7","gemma-3-27b-it","calendar,question","According to my calendar, is anyone visiting?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['liza']" +"urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-8","gemma-3-27b-it","calendar,question","According to my calendar, is anyone visiting?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['liza']" +"urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-9","gemma-3-27b-it","calendar,question","According to my calendar, is anyone visiting?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['liza']" +"urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-0","gemma-3-27b-it","calendar,question","According to my personal calendar, are we leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-1","gemma-3-27b-it","calendar,question","According to my personal calendar, are we leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-2","gemma-3-27b-it","calendar,question","According to my personal calendar, are we leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-3","gemma-3-27b-it","calendar,question","According to my personal calendar, are we leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-4","gemma-3-27b-it","calendar,question","According to my personal calendar, are we leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-5","gemma-3-27b-it","calendar,question","According to my personal calendar, are we leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-6","gemma-3-27b-it","calendar,question","According to my personal calendar, are we leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-7","gemma-3-27b-it","calendar,question","According to my personal calendar, are we leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-8","gemma-3-27b-it","calendar,question","According to my personal calendar, are we leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-9","gemma-3-27b-it","calendar,question","According to my personal calendar, are we leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-0","gemma-3-27b-it","calendar,question","According to my personal calendar, do I have any events away from home today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-1","gemma-3-27b-it","calendar,question","According to my personal calendar, do I have any events away from home today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-2","gemma-3-27b-it","calendar,question","According to my personal calendar, do I have any events away from home today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-3","gemma-3-27b-it","calendar,question","According to my personal calendar, do I have any events away from home today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-4","gemma-3-27b-it","calendar,question","According to my personal calendar, do I have any events away from home today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-5","gemma-3-27b-it","calendar,question","According to my personal calendar, do I have any events away from home today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-6","gemma-3-27b-it","calendar,question","According to my personal calendar, do I have any events away from home today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-7","gemma-3-27b-it","calendar,question","According to my personal calendar, do I have any events away from home today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-8","gemma-3-27b-it","calendar,question","According to my personal calendar, do I have any events away from home today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-9","gemma-3-27b-it","calendar,question","According to my personal calendar, do I have any events away from home today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-0","gemma-3-27b-it","calendar,question","According to my personal calendar, who am I meeting for dinner?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-1","gemma-3-27b-it","calendar,question","According to my personal calendar, who am I meeting for dinner?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-2","gemma-3-27b-it","calendar,question","According to my personal calendar, who am I meeting for dinner?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-3","gemma-3-27b-it","calendar,question","According to my personal calendar, who am I meeting for dinner?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-4","gemma-3-27b-it","calendar,question","According to my personal calendar, who am I meeting for dinner?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-5","gemma-3-27b-it","calendar,question","According to my personal calendar, who am I meeting for dinner?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-6","gemma-3-27b-it","calendar,question","According to my personal calendar, who am I meeting for dinner?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-7","gemma-3-27b-it","calendar,question","According to my personal calendar, who am I meeting for dinner?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-8","gemma-3-27b-it","calendar,question","According to my personal calendar, who am I meeting for dinner?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-9","gemma-3-27b-it","calendar,question","According to my personal calendar, who am I meeting for dinner?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_calendar-am_i_leaving_the_house_today-0","gemma-3-27b-it","calendar,question","Am I leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'dinner', 'school', 'class']" +"urban_loft_au_calendar-am_i_leaving_the_house_today-1","gemma-3-27b-it","calendar,question","Am I leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'dinner', 'school', 'class']" +"urban_loft_au_calendar-am_i_leaving_the_house_today-2","gemma-3-27b-it","calendar,question","Am I leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'dinner', 'school', 'class']" +"urban_loft_au_calendar-am_i_leaving_the_house_today-3","gemma-3-27b-it","calendar,question","Am I leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'dinner', 'school', 'class']" +"urban_loft_au_calendar-am_i_leaving_the_house_today-4","gemma-3-27b-it","calendar,question","Am I leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'dinner', 'school', 'class']" +"urban_loft_au_calendar-am_i_leaving_the_house_today-5","gemma-3-27b-it","calendar,question","Am I leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'dinner', 'school', 'class']" +"urban_loft_au_calendar-am_i_leaving_the_house_today-6","gemma-3-27b-it","calendar,question","Am I leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'dinner', 'school', 'class']" +"urban_loft_au_calendar-am_i_leaving_the_house_today-7","gemma-3-27b-it","calendar,question","Am I leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'dinner', 'school', 'class']" +"urban_loft_au_calendar-am_i_leaving_the_house_today-8","gemma-3-27b-it","calendar,question","Am I leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'dinner', 'school', 'class']" +"urban_loft_au_calendar-am_i_leaving_the_house_today-9","gemma-3-27b-it","calendar,question","Am I leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'dinner', 'school', 'class']" +"urban_loft_au_calendar-are_we_leaving_the_house_today-0","gemma-3-27b-it","calendar,question","Are we leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-are_we_leaving_the_house_today-1","gemma-3-27b-it","calendar,question","Are we leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-are_we_leaving_the_house_today-2","gemma-3-27b-it","calendar,question","Are we leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-are_we_leaving_the_house_today-3","gemma-3-27b-it","calendar,question","Are we leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-are_we_leaving_the_house_today-4","gemma-3-27b-it","calendar,question","Are we leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-are_we_leaving_the_house_today-5","gemma-3-27b-it","calendar,question","Are we leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-are_we_leaving_the_house_today-6","gemma-3-27b-it","calendar,question","Are we leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-are_we_leaving_the_house_today-7","gemma-3-27b-it","calendar,question","Are we leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-are_we_leaving_the_house_today-8","gemma-3-27b-it","calendar,question","Are we leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-are_we_leaving_the_house_today-9","gemma-3-27b-it","calendar,question","Are we leaving the house today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'nothing', 'not leaving', 'may not be leaving', 'do not have any events', 'don't have any events', 'do not see', 'don't see', 'does not seem', 'doesn't seem', 'at home', 'no plans']" +"urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-0","gemma-3-27b-it","calendar,question","Do i have any personal calendar events away from home today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'dinner', 'school', 'class']" +"urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-1","gemma-3-27b-it","calendar,question","Do i have any personal calendar events away from home today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'dinner', 'school', 'class']" +"urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-2","gemma-3-27b-it","calendar,question","Do i have any personal calendar events away from home today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'dinner', 'school', 'class']" +"urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-3","gemma-3-27b-it","calendar,question","Do i have any personal calendar events away from home today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'dinner', 'school', 'class']" +"urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-4","gemma-3-27b-it","calendar,question","Do i have any personal calendar events away from home today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'dinner', 'school', 'class']" +"urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-5","gemma-3-27b-it","calendar,question","Do i have any personal calendar events away from home today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'dinner', 'school', 'class']" +"urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-6","gemma-3-27b-it","calendar,question","Do i have any personal calendar events away from home today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'dinner', 'school', 'class']" +"urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-7","gemma-3-27b-it","calendar,question","Do i have any personal calendar events away from home today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'dinner', 'school', 'class']" +"urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-8","gemma-3-27b-it","calendar,question","Do i have any personal calendar events away from home today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'dinner', 'school', 'class']" +"urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-9","gemma-3-27b-it","calendar,question","Do i have any personal calendar events away from home today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'dinner', 'school', 'class']" +"urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-0","gemma-3-27b-it","calendar,question","From my personal calendar, how many nights do I need to cook this week?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['3', 'three']" +"urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-1","gemma-3-27b-it","calendar,question","From my personal calendar, how many nights do I need to cook this week?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['3', 'three']" +"urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-2","gemma-3-27b-it","calendar,question","From my personal calendar, how many nights do I need to cook this week?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['3', 'three']" +"urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-3","gemma-3-27b-it","calendar,question","From my personal calendar, how many nights do I need to cook this week?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['3', 'three']" +"urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-4","gemma-3-27b-it","calendar,question","From my personal calendar, how many nights do I need to cook this week?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['3', 'three']" +"urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-5","gemma-3-27b-it","calendar,question","From my personal calendar, how many nights do I need to cook this week?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['3', 'three']" +"urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-6","gemma-3-27b-it","calendar,question","From my personal calendar, how many nights do I need to cook this week?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['3', 'three']" +"urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-7","gemma-3-27b-it","calendar,question","From my personal calendar, how many nights do I need to cook this week?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['3', 'three']" +"urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-8","gemma-3-27b-it","calendar,question","From my personal calendar, how many nights do I need to cook this week?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['3', 'three']" +"urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-9","gemma-3-27b-it","calendar,question","From my personal calendar, how many nights do I need to cook this week?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['3', 'three']" +"urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-0","gemma-3-27b-it","calendar,question","How many nights this week do I need to cook?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['3', 'three']" +"urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-1","gemma-3-27b-it","calendar,question","How many nights this week do I need to cook?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['3', 'three']" +"urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-2","gemma-3-27b-it","calendar,question","How many nights this week do I need to cook?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['3', 'three']" +"urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-3","gemma-3-27b-it","calendar,question","How many nights this week do I need to cook?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['3', 'three']" +"urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-4","gemma-3-27b-it","calendar,question","How many nights this week do I need to cook?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['3', 'three']" +"urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-5","gemma-3-27b-it","calendar,question","How many nights this week do I need to cook?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['3', 'three']" +"urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-6","gemma-3-27b-it","calendar,question","How many nights this week do I need to cook?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['3', 'three']" +"urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-7","gemma-3-27b-it","calendar,question","How many nights this week do I need to cook?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['3', 'three']" +"urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-8","gemma-3-27b-it","calendar,question","How many nights this week do I need to cook?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['3', 'three']" +"urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-9","gemma-3-27b-it","calendar,question","How many nights this week do I need to cook?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['3', 'three']" +"urban_loft_au_calendar-is_anyone_coming_to_visit-0","gemma-3-27b-it","calendar,question","Is anyone coming to visit?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['liza']" +"urban_loft_au_calendar-is_anyone_coming_to_visit-1","gemma-3-27b-it","calendar,question","Is anyone coming to visit?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['liza']" +"urban_loft_au_calendar-is_anyone_coming_to_visit-2","gemma-3-27b-it","calendar,question","Is anyone coming to visit?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['liza']" +"urban_loft_au_calendar-is_anyone_coming_to_visit-3","gemma-3-27b-it","calendar,question","Is anyone coming to visit?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['liza']" +"urban_loft_au_calendar-is_anyone_coming_to_visit-4","gemma-3-27b-it","calendar,question","Is anyone coming to visit?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['liza']" +"urban_loft_au_calendar-is_anyone_coming_to_visit-5","gemma-3-27b-it","calendar,question","Is anyone coming to visit?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['liza']" +"urban_loft_au_calendar-is_anyone_coming_to_visit-6","gemma-3-27b-it","calendar,question","Is anyone coming to visit?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['liza']" +"urban_loft_au_calendar-is_anyone_coming_to_visit-7","gemma-3-27b-it","calendar,question","Is anyone coming to visit?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['liza']" +"urban_loft_au_calendar-is_anyone_coming_to_visit-8","gemma-3-27b-it","calendar,question","Is anyone coming to visit?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['liza']" +"urban_loft_au_calendar-is_anyone_coming_to_visit-9","gemma-3-27b-it","calendar,question","Is anyone coming to visit?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['liza']" +"urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-0","gemma-3-27b-it","calendar,question","What classes are on my personal calendar today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['chemistry']" +"urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-1","gemma-3-27b-it","calendar,question","What classes are on my personal calendar today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['chemistry']" +"urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-2","gemma-3-27b-it","calendar,question","What classes are on my personal calendar today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['chemistry']" +"urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-3","gemma-3-27b-it","calendar,question","What classes are on my personal calendar today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['chemistry']" +"urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-4","gemma-3-27b-it","calendar,question","What classes are on my personal calendar today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['chemistry']" +"urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-5","gemma-3-27b-it","calendar,question","What classes are on my personal calendar today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['chemistry']" +"urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-6","gemma-3-27b-it","calendar,question","What classes are on my personal calendar today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['chemistry']" +"urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-7","gemma-3-27b-it","calendar,question","What classes are on my personal calendar today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['chemistry']" +"urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-8","gemma-3-27b-it","calendar,question","What classes are on my personal calendar today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['chemistry']" +"urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-9","gemma-3-27b-it","calendar,question","What classes are on my personal calendar today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['chemistry']" +"urban_loft_au_calendar-what_classes_do_i_have_today-0","gemma-3-27b-it","calendar,question","What classes do I have today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['chemistry']" +"urban_loft_au_calendar-what_classes_do_i_have_today-1","gemma-3-27b-it","calendar,question","What classes do I have today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['chemistry']" +"urban_loft_au_calendar-what_classes_do_i_have_today-2","gemma-3-27b-it","calendar,question","What classes do I have today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['chemistry']" +"urban_loft_au_calendar-what_classes_do_i_have_today-3","gemma-3-27b-it","calendar,question","What classes do I have today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['chemistry']" +"urban_loft_au_calendar-what_classes_do_i_have_today-4","gemma-3-27b-it","calendar,question","What classes do I have today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['chemistry']" +"urban_loft_au_calendar-what_classes_do_i_have_today-5","gemma-3-27b-it","calendar,question","What classes do I have today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['chemistry']" +"urban_loft_au_calendar-what_classes_do_i_have_today-6","gemma-3-27b-it","calendar,question","What classes do I have today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['chemistry']" +"urban_loft_au_calendar-what_classes_do_i_have_today-7","gemma-3-27b-it","calendar,question","What classes do I have today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['chemistry']" +"urban_loft_au_calendar-what_classes_do_i_have_today-8","gemma-3-27b-it","calendar,question","What classes do I have today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['chemistry']" +"urban_loft_au_calendar-what_classes_do_i_have_today-9","gemma-3-27b-it","calendar,question","What classes do I have today?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['chemistry']" +"urban_loft_au_calendar-who_am_i_meeting_for_dinner-0","gemma-3-27b-it","calendar,question","Who am I meeting for dinner?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_calendar-who_am_i_meeting_for_dinner-1","gemma-3-27b-it","calendar,question","Who am I meeting for dinner?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_calendar-who_am_i_meeting_for_dinner-2","gemma-3-27b-it","calendar,question","Who am I meeting for dinner?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_calendar-who_am_i_meeting_for_dinner-3","gemma-3-27b-it","calendar,question","Who am I meeting for dinner?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_calendar-who_am_i_meeting_for_dinner-4","gemma-3-27b-it","calendar,question","Who am I meeting for dinner?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_calendar-who_am_i_meeting_for_dinner-5","gemma-3-27b-it","calendar,question","Who am I meeting for dinner?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_calendar-who_am_i_meeting_for_dinner-6","gemma-3-27b-it","calendar,question","Who am I meeting for dinner?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_calendar-who_am_i_meeting_for_dinner-7","gemma-3-27b-it","calendar,question","Who am I meeting for dinner?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_calendar-who_am_i_meeting_for_dinner-8","gemma-3-27b-it","calendar,question","Who am I meeting for dinner?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_calendar-who_am_i_meeting_for_dinner-9","gemma-3-27b-it","calendar,question","Who am I meeting for dinner?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_light-how_many_lights_are_currently_on-0","gemma-3-27b-it","light,question","How many lights are currently on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['2', 'two', 'bedroom', 'balcony']" +"urban_loft_au_light-how_many_lights_are_currently_on-1","gemma-3-27b-it","light,question","How many lights are currently on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['2', 'two', 'bedroom', 'balcony']" +"urban_loft_au_light-how_many_lights_are_currently_on-2","gemma-3-27b-it","light,question","How many lights are currently on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['2', 'two', 'bedroom', 'balcony']" +"urban_loft_au_light-how_many_lights_are_currently_on-3","gemma-3-27b-it","light,question","How many lights are currently on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['2', 'two', 'bedroom', 'balcony']" +"urban_loft_au_light-how_many_lights_are_currently_on-4","gemma-3-27b-it","light,question","How many lights are currently on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['2', 'two', 'bedroom', 'balcony']" +"urban_loft_au_light-how_many_lights_are_currently_on-5","gemma-3-27b-it","light,question","How many lights are currently on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['2', 'two', 'bedroom', 'balcony']" +"urban_loft_au_light-how_many_lights_are_currently_on-6","gemma-3-27b-it","light,question","How many lights are currently on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['2', 'two', 'bedroom', 'balcony']" +"urban_loft_au_light-how_many_lights_are_currently_on-7","gemma-3-27b-it","light,question","How many lights are currently on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['2', 'two', 'bedroom', 'balcony']" +"urban_loft_au_light-how_many_lights_are_currently_on-8","gemma-3-27b-it","light,question","How many lights are currently on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['2', 'two', 'bedroom', 'balcony']" +"urban_loft_au_light-how_many_lights_are_currently_on-9","gemma-3-27b-it","light,question","How many lights are currently on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['2', 'two', 'bedroom', 'balcony']" +"urban_loft_au_light-is_the_bedroom_light_off-0","gemma-3-27b-it","light,question","Is the bedroom light off?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'on']" +"urban_loft_au_light-is_the_bedroom_light_off-1","gemma-3-27b-it","light,question","Is the bedroom light off?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'on']" +"urban_loft_au_light-is_the_bedroom_light_off-2","gemma-3-27b-it","light,question","Is the bedroom light off?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'on']" +"urban_loft_au_light-is_the_bedroom_light_off-3","gemma-3-27b-it","light,question","Is the bedroom light off?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'on']" +"urban_loft_au_light-is_the_bedroom_light_off-4","gemma-3-27b-it","light,question","Is the bedroom light off?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'on']" +"urban_loft_au_light-is_the_bedroom_light_off-5","gemma-3-27b-it","light,question","Is the bedroom light off?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'on']" +"urban_loft_au_light-is_the_bedroom_light_off-6","gemma-3-27b-it","light,question","Is the bedroom light off?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'on']" +"urban_loft_au_light-is_the_bedroom_light_off-7","gemma-3-27b-it","light,question","Is the bedroom light off?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'on']" +"urban_loft_au_light-is_the_bedroom_light_off-8","gemma-3-27b-it","light,question","Is the bedroom light off?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'on']" +"urban_loft_au_light-is_the_bedroom_light_off-9","gemma-3-27b-it","light,question","Is the bedroom light off?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'on']" +"urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-0","gemma-3-27b-it","light,question","Is the bedroom light off? Please answer with 'yes' or 'no'.","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'off']" +"urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-1","gemma-3-27b-it","light,question","Is the bedroom light off? Please answer with 'yes' or 'no'.","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'off']" +"urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-2","gemma-3-27b-it","light,question","Is the bedroom light off? Please answer with 'yes' or 'no'.","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'off']" +"urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-3","gemma-3-27b-it","light,question","Is the bedroom light off? Please answer with 'yes' or 'no'.","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'off']" +"urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-4","gemma-3-27b-it","light,question","Is the bedroom light off? Please answer with 'yes' or 'no'.","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'off']" +"urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-5","gemma-3-27b-it","light,question","Is the bedroom light off? Please answer with 'yes' or 'no'.","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'off']" +"urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-6","gemma-3-27b-it","light,question","Is the bedroom light off? Please answer with 'yes' or 'no'.","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'off']" +"urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-7","gemma-3-27b-it","light,question","Is the bedroom light off? Please answer with 'yes' or 'no'.","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'off']" +"urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-8","gemma-3-27b-it","light,question","Is the bedroom light off? Please answer with 'yes' or 'no'.","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'off']" +"urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-9","gemma-3-27b-it","light,question","Is the bedroom light off? Please answer with 'yes' or 'no'.","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'off']" +"urban_loft_au_light-is_the_bedroom_light_on-0","gemma-3-27b-it","light,question","Is the bedroom light on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on']" +"urban_loft_au_light-is_the_bedroom_light_on-1","gemma-3-27b-it","light,question","Is the bedroom light on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on']" +"urban_loft_au_light-is_the_bedroom_light_on-2","gemma-3-27b-it","light,question","Is the bedroom light on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on']" +"urban_loft_au_light-is_the_bedroom_light_on-3","gemma-3-27b-it","light,question","Is the bedroom light on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on']" +"urban_loft_au_light-is_the_bedroom_light_on-4","gemma-3-27b-it","light,question","Is the bedroom light on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on']" +"urban_loft_au_light-is_the_bedroom_light_on-5","gemma-3-27b-it","light,question","Is the bedroom light on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on']" +"urban_loft_au_light-is_the_bedroom_light_on-6","gemma-3-27b-it","light,question","Is the bedroom light on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on']" +"urban_loft_au_light-is_the_bedroom_light_on-7","gemma-3-27b-it","light,question","Is the bedroom light on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on']" +"urban_loft_au_light-is_the_bedroom_light_on-8","gemma-3-27b-it","light,question","Is the bedroom light on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on']" +"urban_loft_au_light-is_the_bedroom_light_on-9","gemma-3-27b-it","light,question","Is the bedroom light on?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['yes', 'on']" +"urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-0","gemma-3-27b-it","light,question","Is the bedroom light on? Please answer 'yes' or 'no'.","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off']" +"urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-1","gemma-3-27b-it","light,question","Is the bedroom light on? Please answer 'yes' or 'no'.","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off']" +"urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-2","gemma-3-27b-it","light,question","Is the bedroom light on? Please answer 'yes' or 'no'.","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off']" +"urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-3","gemma-3-27b-it","light,question","Is the bedroom light on? Please answer 'yes' or 'no'.","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off']" +"urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-4","gemma-3-27b-it","light,question","Is the bedroom light on? Please answer 'yes' or 'no'.","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off']" +"urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-5","gemma-3-27b-it","light,question","Is the bedroom light on? Please answer 'yes' or 'no'.","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off']" +"urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-6","gemma-3-27b-it","light,question","Is the bedroom light on? Please answer 'yes' or 'no'.","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off']" +"urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-7","gemma-3-27b-it","light,question","Is the bedroom light on? Please answer 'yes' or 'no'.","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off']" +"urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-8","gemma-3-27b-it","light,question","Is the bedroom light on? Please answer 'yes' or 'no'.","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off']" +"urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-9","gemma-3-27b-it","light,question","Is the bedroom light on? Please answer 'yes' or 'no'.","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['no', 'off']" +"urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-0","gemma-3-27b-it","media-player,question","Is the living room media player playing, paused, or stopped?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['paused']" +"urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-1","gemma-3-27b-it","media-player,question","Is the living room media player playing, paused, or stopped?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['paused']" +"urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-2","gemma-3-27b-it","media-player,question","Is the living room media player playing, paused, or stopped?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['paused']" +"urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-3","gemma-3-27b-it","media-player,question","Is the living room media player playing, paused, or stopped?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['paused']" +"urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-4","gemma-3-27b-it","media-player,question","Is the living room media player playing, paused, or stopped?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['paused']" +"urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-5","gemma-3-27b-it","media-player,question","Is the living room media player playing, paused, or stopped?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['paused']" +"urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-6","gemma-3-27b-it","media-player,question","Is the living room media player playing, paused, or stopped?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['paused']" +"urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-7","gemma-3-27b-it","media-player,question","Is the living room media player playing, paused, or stopped?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['paused']" +"urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-8","gemma-3-27b-it","media-player,question","Is the living room media player playing, paused, or stopped?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['paused']" +"urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-9","gemma-3-27b-it","media-player,question","Is the living room media player playing, paused, or stopped?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['paused']" +"urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-0","gemma-3-27b-it","media-player,question","Is the media player playing, paused, or stopped?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['playing']" +"urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-1","gemma-3-27b-it","media-player,question","Is the media player playing, paused, or stopped?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['playing']" +"urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-2","gemma-3-27b-it","media-player,question","Is the media player playing, paused, or stopped?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['playing']" +"urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-3","gemma-3-27b-it","media-player,question","Is the media player playing, paused, or stopped?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['playing']" +"urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-4","gemma-3-27b-it","media-player,question","Is the media player playing, paused, or stopped?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['playing']" +"urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-5","gemma-3-27b-it","media-player,question","Is the media player playing, paused, or stopped?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['playing']" +"urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-6","gemma-3-27b-it","media-player,question","Is the media player playing, paused, or stopped?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['playing']" +"urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-7","gemma-3-27b-it","media-player,question","Is the media player playing, paused, or stopped?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['playing']" +"urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-8","gemma-3-27b-it","media-player,question","Is the media player playing, paused, or stopped?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['playing']" +"urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-9","gemma-3-27b-it","media-player,question","Is the media player playing, paused, or stopped?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['playing']" +"urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-0","gemma-3-27b-it","sensor,question","What is the battery level of the motion sensor?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['55%', '55 percent']" +"urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-1","gemma-3-27b-it","sensor,question","What is the battery level of the motion sensor?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['55%', '55 percent']" +"urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-2","gemma-3-27b-it","sensor,question","What is the battery level of the motion sensor?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['55%', '55 percent']" +"urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-3","gemma-3-27b-it","sensor,question","What is the battery level of the motion sensor?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['55%', '55 percent']" +"urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-4","gemma-3-27b-it","sensor,question","What is the battery level of the motion sensor?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['55%', '55 percent']" +"urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-5","gemma-3-27b-it","sensor,question","What is the battery level of the motion sensor?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['55%', '55 percent']" +"urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-6","gemma-3-27b-it","sensor,question","What is the battery level of the motion sensor?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['55%', '55 percent']" +"urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-7","gemma-3-27b-it","sensor,question","What is the battery level of the motion sensor?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['55%', '55 percent']" +"urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-8","gemma-3-27b-it","sensor,question","What is the battery level of the motion sensor?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['55%', '55 percent']" +"urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-9","gemma-3-27b-it","sensor,question","What is the battery level of the motion sensor?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['55%', '55 percent']" +"urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-0","gemma-3-27b-it","sensor,question","What is the motion sensor battery level?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['13%', '13 percent']" +"urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-1","gemma-3-27b-it","sensor,question","What is the motion sensor battery level?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['13%', '13 percent']" +"urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-2","gemma-3-27b-it","sensor,question","What is the motion sensor battery level?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['13%', '13 percent']" +"urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-3","gemma-3-27b-it","sensor,question","What is the motion sensor battery level?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['13%', '13 percent']" +"urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-4","gemma-3-27b-it","sensor,question","What is the motion sensor battery level?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['13%', '13 percent']" +"urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-5","gemma-3-27b-it","sensor,question","What is the motion sensor battery level?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['13%', '13 percent']" +"urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-6","gemma-3-27b-it","sensor,question","What is the motion sensor battery level?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['13%', '13 percent']" +"urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-7","gemma-3-27b-it","sensor,question","What is the motion sensor battery level?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['13%', '13 percent']" +"urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-8","gemma-3-27b-it","sensor,question","What is the motion sensor battery level?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['13%', '13 percent']" +"urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-9","gemma-3-27b-it","sensor,question","What is the motion sensor battery level?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['13%', '13 percent']" +"urban_loft_au_todo-how_many_items_are_on_my_task_list-0","gemma-3-27b-it","todo,question","How many items are on my task list?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['2']" +"urban_loft_au_todo-how_many_items_are_on_my_task_list-1","gemma-3-27b-it","todo,question","How many items are on my task list?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['2']" +"urban_loft_au_todo-how_many_items_are_on_my_task_list-2","gemma-3-27b-it","todo,question","How many items are on my task list?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['2']" +"urban_loft_au_todo-how_many_items_are_on_my_task_list-3","gemma-3-27b-it","todo,question","How many items are on my task list?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['2']" +"urban_loft_au_todo-how_many_items_are_on_my_task_list-4","gemma-3-27b-it","todo,question","How many items are on my task list?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['2']" +"urban_loft_au_todo-how_many_items_are_on_my_task_list-5","gemma-3-27b-it","todo,question","How many items are on my task list?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['2']" +"urban_loft_au_todo-how_many_items_are_on_my_task_list-6","gemma-3-27b-it","todo,question","How many items are on my task list?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['2']" +"urban_loft_au_todo-how_many_items_are_on_my_task_list-7","gemma-3-27b-it","todo,question","How many items are on my task list?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['2']" +"urban_loft_au_todo-how_many_items_are_on_my_task_list-8","gemma-3-27b-it","todo,question","How many items are on my task list?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['2']" +"urban_loft_au_todo-how_many_items_are_on_my_task_list-9","gemma-3-27b-it","todo,question","How many items are on my task list?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['2']" +"urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-0","gemma-3-27b-it","todo,question","What chores around the house do I need to complete?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['terrace light']" +"urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-1","gemma-3-27b-it","todo,question","What chores around the house do I need to complete?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['terrace light']" +"urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-2","gemma-3-27b-it","todo,question","What chores around the house do I need to complete?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['terrace light']" +"urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-3","gemma-3-27b-it","todo,question","What chores around the house do I need to complete?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['terrace light']" +"urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-4","gemma-3-27b-it","todo,question","What chores around the house do I need to complete?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['terrace light']" +"urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-5","gemma-3-27b-it","todo,question","What chores around the house do I need to complete?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['terrace light']" +"urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-6","gemma-3-27b-it","todo,question","What chores around the house do I need to complete?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['terrace light']" +"urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-7","gemma-3-27b-it","todo,question","What chores around the house do I need to complete?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['terrace light']" +"urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-8","gemma-3-27b-it","todo,question","What chores around the house do I need to complete?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['terrace light']" +"urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-9","gemma-3-27b-it","todo,question","What chores around the house do I need to complete?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['terrace light']" +"urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-0","gemma-3-27b-it","todo,question","What's is on my task list to buy at the grocery store?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['salad']" +"urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-1","gemma-3-27b-it","todo,question","What's is on my task list to buy at the grocery store?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['salad']" +"urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-2","gemma-3-27b-it","todo,question","What's is on my task list to buy at the grocery store?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['salad']" +"urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-3","gemma-3-27b-it","todo,question","What's is on my task list to buy at the grocery store?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['salad']" +"urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-4","gemma-3-27b-it","todo,question","What's is on my task list to buy at the grocery store?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['salad']" +"urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-5","gemma-3-27b-it","todo,question","What's is on my task list to buy at the grocery store?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['salad']" +"urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-6","gemma-3-27b-it","todo,question","What's is on my task list to buy at the grocery store?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['salad']" +"urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-7","gemma-3-27b-it","todo,question","What's is on my task list to buy at the grocery store?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['salad']" +"urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-8","gemma-3-27b-it","todo,question","What's is on my task list to buy at the grocery store?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['salad']" +"urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-9","gemma-3-27b-it","todo,question","What's is on my task list to buy at the grocery store?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['salad']" +"urban_loft_au_todo-who_do_i_need_to_call-0","gemma-3-27b-it","todo,question","Who do i need to call?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_todo-who_do_i_need_to_call-1","gemma-3-27b-it","todo,question","Who do i need to call?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_todo-who_do_i_need_to_call-2","gemma-3-27b-it","todo,question","Who do i need to call?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_todo-who_do_i_need_to_call-3","gemma-3-27b-it","todo,question","Who do i need to call?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_todo-who_do_i_need_to_call-4","gemma-3-27b-it","todo,question","Who do i need to call?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_todo-who_do_i_need_to_call-5","gemma-3-27b-it","todo,question","Who do i need to call?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_todo-who_do_i_need_to_call-6","gemma-3-27b-it","todo,question","Who do i need to call?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_todo-who_do_i_need_to_call-7","gemma-3-27b-it","todo,question","Who do i need to call?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_todo-who_do_i_need_to_call-8","gemma-3-27b-it","todo,question","Who do i need to call?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_todo-who_do_i_need_to_call-9","gemma-3-27b-it","todo,question","Who do i need to call?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_todo-who_is_on_my_task_list_to_call-0","gemma-3-27b-it","todo,question","Who is on my task list to call?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_todo-who_is_on_my_task_list_to_call-1","gemma-3-27b-it","todo,question","Who is on my task list to call?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_todo-who_is_on_my_task_list_to_call-2","gemma-3-27b-it","todo,question","Who is on my task list to call?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_todo-who_is_on_my_task_list_to_call-3","gemma-3-27b-it","todo,question","Who is on my task list to call?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_todo-who_is_on_my_task_list_to_call-4","gemma-3-27b-it","todo,question","Who is on my task list to call?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_todo-who_is_on_my_task_list_to_call-5","gemma-3-27b-it","todo,question","Who is on my task list to call?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_todo-who_is_on_my_task_list_to_call-6","gemma-3-27b-it","todo,question","Who is on my task list to call?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_todo-who_is_on_my_task_list_to_call-7","gemma-3-27b-it","todo,question","Who is on my task list to call?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_todo-who_is_on_my_task_list_to_call-8","gemma-3-27b-it","todo,question","Who is on my task list to call?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_todo-who_is_on_my_task_list_to_call-9","gemma-3-27b-it","todo,question","Who is on my task list to call?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['Liza']" +"urban_loft_au_weather-is_today_sunny_or_cloudy-0","gemma-3-27b-it","weather,question","Is today sunny or cloudy?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['cloudy']" +"urban_loft_au_weather-is_today_sunny_or_cloudy-1","gemma-3-27b-it","weather,question","Is today sunny or cloudy?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['cloudy']" +"urban_loft_au_weather-is_today_sunny_or_cloudy-2","gemma-3-27b-it","weather,question","Is today sunny or cloudy?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['cloudy']" +"urban_loft_au_weather-is_today_sunny_or_cloudy-3","gemma-3-27b-it","weather,question","Is today sunny or cloudy?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['cloudy']" +"urban_loft_au_weather-is_today_sunny_or_cloudy-4","gemma-3-27b-it","weather,question","Is today sunny or cloudy?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['cloudy']" +"urban_loft_au_weather-is_today_sunny_or_cloudy-5","gemma-3-27b-it","weather,question","Is today sunny or cloudy?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['cloudy']" +"urban_loft_au_weather-is_today_sunny_or_cloudy-6","gemma-3-27b-it","weather,question","Is today sunny or cloudy?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['cloudy']" +"urban_loft_au_weather-is_today_sunny_or_cloudy-7","gemma-3-27b-it","weather,question","Is today sunny or cloudy?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['cloudy']" +"urban_loft_au_weather-is_today_sunny_or_cloudy-8","gemma-3-27b-it","weather,question","Is today sunny or cloudy?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['cloudy']" +"urban_loft_au_weather-is_today_sunny_or_cloudy-9","gemma-3-27b-it","weather,question","Is today sunny or cloudy?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['cloudy']" +"urban_loft_au_weather-what_is_the_temperature_outside-0","gemma-3-27b-it","weather,question","What is the temperature outside?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['30']" +"urban_loft_au_weather-what_is_the_temperature_outside-1","gemma-3-27b-it","weather,question","What is the temperature outside?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['30']" +"urban_loft_au_weather-what_is_the_temperature_outside-2","gemma-3-27b-it","weather,question","What is the temperature outside?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['30']" +"urban_loft_au_weather-what_is_the_temperature_outside-3","gemma-3-27b-it","weather,question","What is the temperature outside?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['30']" +"urban_loft_au_weather-what_is_the_temperature_outside-4","gemma-3-27b-it","weather,question","What is the temperature outside?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['30']" +"urban_loft_au_weather-what_is_the_temperature_outside-5","gemma-3-27b-it","weather,question","What is the temperature outside?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['30']" +"urban_loft_au_weather-what_is_the_temperature_outside-6","gemma-3-27b-it","weather,question","What is the temperature outside?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['30']" +"urban_loft_au_weather-what_is_the_temperature_outside-7","gemma-3-27b-it","weather,question","What is the temperature outside?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['30']" +"urban_loft_au_weather-what_is_the_temperature_outside-8","gemma-3-27b-it","weather,question","What is the temperature outside?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['30']" +"urban_loft_au_weather-what_is_the_temperature_outside-9","gemma-3-27b-it","weather,question","What is the temperature outside?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['30']" +"urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-0","gemma-3-27b-it","weather,question","Will the weather today be sunny or cloudy?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['sunny']" +"urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-1","gemma-3-27b-it","weather,question","Will the weather today be sunny or cloudy?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['sunny']" +"urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-2","gemma-3-27b-it","weather,question","Will the weather today be sunny or cloudy?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['sunny']" +"urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-3","gemma-3-27b-it","weather,question","Will the weather today be sunny or cloudy?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['sunny']" +"urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-4","gemma-3-27b-it","weather,question","Will the weather today be sunny or cloudy?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['sunny']" +"urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-5","gemma-3-27b-it","weather,question","Will the weather today be sunny or cloudy?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['sunny']" +"urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-6","gemma-3-27b-it","weather,question","Will the weather today be sunny or cloudy?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['sunny']" +"urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-7","gemma-3-27b-it","weather,question","Will the weather today be sunny or cloudy?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['sunny']" +"urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-8","gemma-3-27b-it","weather,question","Will the weather today be sunny or cloudy?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['sunny']" +"urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-9","gemma-3-27b-it","weather,question","Will the weather today be sunny or cloudy?","","Error talking to API","eval-test_expect_response","Bad","ValueError: Response 'Error talking to API' did not contain any of ['sunny']" diff --git a/reports/questions/2026.2.2/reports-by-category.yaml b/reports/questions/2026.2.2/reports-by-category.yaml new file mode 100644 index 000000000..6a92b1ca1 --- /dev/null +++ b/reports/questions/2026.2.2/reports-by-category.yaml @@ -0,0 +1,52 @@ +--- +- category: calendar + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 130 +- category: climate + good_percent: 50.0% + confidence_interval: 21.9% + good: 10 + total: 20 +- category: light + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 50 +- category: media-player + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 20 +- category: question + good_percent: 2.7% + confidence_interval: 1.7% + good: 10 + total: 370 +- category: sensor + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 30 +- category: switch + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 20 +- category: todo + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 50 +- category: valve + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 20 +- category: weather + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 30 + diff --git a/reports/questions/2026.2.2/reports-by-model-category.yaml b/reports/questions/2026.2.2/reports-by-model-category.yaml new file mode 100644 index 000000000..117f170f9 --- /dev/null +++ b/reports/questions/2026.2.2/reports-by-model-category.yaml @@ -0,0 +1,52 @@ +--- +- model_id-category: gemma-3-27b-it-calendar + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 130 +- model_id-category: gemma-3-27b-it-climate + good_percent: 50.0% + confidence_interval: 21.9% + good: 10 + total: 20 +- model_id-category: gemma-3-27b-it-light + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 50 +- model_id-category: gemma-3-27b-it-media-player + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 20 +- model_id-category: gemma-3-27b-it-question + good_percent: 2.7% + confidence_interval: 1.7% + good: 10 + total: 370 +- model_id-category: gemma-3-27b-it-sensor + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 30 +- model_id-category: gemma-3-27b-it-switch + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 20 +- model_id-category: gemma-3-27b-it-todo + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 50 +- model_id-category: gemma-3-27b-it-valve + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 20 +- model_id-category: gemma-3-27b-it-weather + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 30 + diff --git a/reports/questions/2026.2.2/reports-by-model-test-name.yaml b/reports/questions/2026.2.2/reports-by-model-test-name.yaml new file mode 100644 index 000000000..191e9f2dd --- /dev/null +++ b/reports/questions/2026.2.2/reports-by-model-test-name.yaml @@ -0,0 +1,22 @@ +--- +- model_id-task_name: gemma-3-27b-it-eval-test_expect_llm_tool_call_args + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 0 +- model_id-task_name: gemma-3-27b-it-eval-test_expect_llm_tool_call_name + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 0 +- model_id-task_name: gemma-3-27b-it-eval-test_expect_response + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 350 +- model_id-task_name: gemma-3-27b-it-eval-test_expected_states + good_percent: 50.0% + confidence_interval: 21.9% + good: 10 + total: 20 + diff --git a/reports/questions/2026.2.2/reports-by-task-id.yaml b/reports/questions/2026.2.2/reports-by-task-id.yaml new file mode 100644 index 000000000..af44c1a9b --- /dev/null +++ b/reports/questions/2026.2.2/reports-by-task-id.yaml @@ -0,0 +1,1852 @@ +--- +- task_id: suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_climate-if_the_temperature_in_the_guest_room_is_above_23_then_set_the_target_temperature_to_22-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-0 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-1 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-2 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-3 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-4 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-5 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-6 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-7 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-8 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: suburban_familiy_home_be_climate-set_the_target_temperature_to_22_in_the_guest_room_if_it_is_above_23-9 + good_percent: 100.0% + confidence_interval: 0.0% + good: 1 + total: 1 +- task_id: suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_sensor-is_the_kitchen_sink_battery_low-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_switch-is_the_kitchen_plug_on-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_switch-is_the_kitchen_plug_on-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_switch-is_the_kitchen_plug_on-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_switch-is_the_kitchen_plug_on-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_switch-is_the_kitchen_plug_on-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_switch-is_the_kitchen_plug_on-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_switch-is_the_kitchen_plug_on-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_switch-is_the_kitchen_plug_on-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_switch-is_the_kitchen_plug_on-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_switch-is_the_kitchen_plug_on-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_switch-is_the_plug_in_the_kitchen_on-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_valve-are_the_sprinklers_on-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_valve-are_the_sprinklers_on-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_valve-are_the_sprinklers_on-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_valve-are_the_sprinklers_on-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_valve-are_the_sprinklers_on-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_valve-are_the_sprinklers_on-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_valve-are_the_sprinklers_on-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_valve-are_the_sprinklers_on-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_valve-are_the_sprinklers_on-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_valve-are_the_sprinklers_on-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: suburban_familiy_home_be_valve-please_tell_me_are_the_sprinklers_on-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_calendar_is_anyone_visiting-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_are_we_leaving_the_house_today-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_do_i_have_any_events_away_from_home_today-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-according_to_my_personal_calendar_who_am_i_meeting_for_dinner-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-am_i_leaving_the_house_today-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-am_i_leaving_the_house_today-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-am_i_leaving_the_house_today-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-am_i_leaving_the_house_today-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-am_i_leaving_the_house_today-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-am_i_leaving_the_house_today-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-am_i_leaving_the_house_today-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-am_i_leaving_the_house_today-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-am_i_leaving_the_house_today-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-am_i_leaving_the_house_today-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-are_we_leaving_the_house_today-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-are_we_leaving_the_house_today-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-are_we_leaving_the_house_today-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-are_we_leaving_the_house_today-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-are_we_leaving_the_house_today-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-are_we_leaving_the_house_today-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-are_we_leaving_the_house_today-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-are_we_leaving_the_house_today-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-are_we_leaving_the_house_today-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-are_we_leaving_the_house_today-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-do_i_have_any_personal_calendar_events_away_from_home_today-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-from_my_personal_calendar_how_many_nights_do_i_need_to_cook_this_week-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-how_many_nights_this_week_do_i_need_to_cook-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-is_anyone_coming_to_visit-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-is_anyone_coming_to_visit-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-is_anyone_coming_to_visit-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-is_anyone_coming_to_visit-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-is_anyone_coming_to_visit-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-is_anyone_coming_to_visit-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-is_anyone_coming_to_visit-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-is_anyone_coming_to_visit-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-is_anyone_coming_to_visit-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-is_anyone_coming_to_visit-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-what_classes_are_on_my_personal_calendar_today-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-what_classes_do_i_have_today-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-what_classes_do_i_have_today-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-what_classes_do_i_have_today-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-what_classes_do_i_have_today-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-what_classes_do_i_have_today-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-what_classes_do_i_have_today-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-what_classes_do_i_have_today-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-what_classes_do_i_have_today-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-what_classes_do_i_have_today-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-what_classes_do_i_have_today-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-who_am_i_meeting_for_dinner-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-who_am_i_meeting_for_dinner-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-who_am_i_meeting_for_dinner-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-who_am_i_meeting_for_dinner-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-who_am_i_meeting_for_dinner-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-who_am_i_meeting_for_dinner-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-who_am_i_meeting_for_dinner-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-who_am_i_meeting_for_dinner-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-who_am_i_meeting_for_dinner-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_calendar-who_am_i_meeting_for_dinner-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-how_many_lights_are_currently_on-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-how_many_lights_are_currently_on-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-how_many_lights_are_currently_on-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-how_many_lights_are_currently_on-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-how_many_lights_are_currently_on-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-how_many_lights_are_currently_on-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-how_many_lights_are_currently_on-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-how_many_lights_are_currently_on-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-how_many_lights_are_currently_on-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-how_many_lights_are_currently_on-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_off-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_off-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_off-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_off-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_off-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_off-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_off-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_off-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_off-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_off-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_off_please_answer_with_yes_or_no-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_on-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_on-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_on-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_on-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_on-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_on-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_on-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_on-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_on-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_on-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_light-is_the_bedroom_light_on_please_answer_yes_or_no-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_media_player-is_the_living_room_media_player_playing_paused_or_stopped-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_media_player-is_the_media_player_playing_paused_or_stopped-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_sensor-what_is_the_battery_level_of_the_motion_sensor-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_sensor-what_is_the_motion_sensor_battery_level-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-how_many_items_are_on_my_task_list-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-how_many_items_are_on_my_task_list-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-how_many_items_are_on_my_task_list-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-how_many_items_are_on_my_task_list-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-how_many_items_are_on_my_task_list-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-how_many_items_are_on_my_task_list-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-how_many_items_are_on_my_task_list-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-how_many_items_are_on_my_task_list-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-how_many_items_are_on_my_task_list-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-how_many_items_are_on_my_task_list-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-what_chores_around_the_house_do_i_need_to_complete-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-what_s_is_on_my_task_list_to_buy_at_the_grocery_store-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-who_do_i_need_to_call-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-who_do_i_need_to_call-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-who_do_i_need_to_call-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-who_do_i_need_to_call-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-who_do_i_need_to_call-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-who_do_i_need_to_call-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-who_do_i_need_to_call-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-who_do_i_need_to_call-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-who_do_i_need_to_call-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-who_do_i_need_to_call-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-who_is_on_my_task_list_to_call-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-who_is_on_my_task_list_to_call-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-who_is_on_my_task_list_to_call-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-who_is_on_my_task_list_to_call-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-who_is_on_my_task_list_to_call-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-who_is_on_my_task_list_to_call-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-who_is_on_my_task_list_to_call-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-who_is_on_my_task_list_to_call-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-who_is_on_my_task_list_to_call-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_todo-who_is_on_my_task_list_to_call-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-is_today_sunny_or_cloudy-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-is_today_sunny_or_cloudy-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-is_today_sunny_or_cloudy-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-is_today_sunny_or_cloudy-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-is_today_sunny_or_cloudy-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-is_today_sunny_or_cloudy-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-is_today_sunny_or_cloudy-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-is_today_sunny_or_cloudy-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-is_today_sunny_or_cloudy-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-is_today_sunny_or_cloudy-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-what_is_the_temperature_outside-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-what_is_the_temperature_outside-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-what_is_the_temperature_outside-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-what_is_the_temperature_outside-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-what_is_the_temperature_outside-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-what_is_the_temperature_outside-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-what_is_the_temperature_outside-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-what_is_the_temperature_outside-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-what_is_the_temperature_outside-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-what_is_the_temperature_outside-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-0 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-1 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-2 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-3 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-4 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-5 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-6 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-7 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-8 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 +- task_id: urban_loft_au_weather-will_the_weather_today_be_sunny_or_cloudy-9 + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 1 + diff --git a/reports/questions/2026.2.2/reports-by-test-name.yaml b/reports/questions/2026.2.2/reports-by-test-name.yaml new file mode 100644 index 000000000..5e25f6e86 --- /dev/null +++ b/reports/questions/2026.2.2/reports-by-test-name.yaml @@ -0,0 +1,22 @@ +--- +- task_name: eval-test_expect_llm_tool_call_args + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 0 +- task_name: eval-test_expect_llm_tool_call_name + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 0 +- task_name: eval-test_expect_response + good_percent: 0.0% + confidence_interval: 0.0% + good: 0 + total: 350 +- task_name: eval-test_expected_states + good_percent: 50.0% + confidence_interval: 21.9% + good: 10 + total: 20 + diff --git a/reports/questions/2026.2.2/reports-token-stats.yaml b/reports/questions/2026.2.2/reports-token-stats.yaml new file mode 100644 index 000000000..63b69c89f --- /dev/null +++ b/reports/questions/2026.2.2/reports-token-stats.yaml @@ -0,0 +1,2 @@ +--- [] + diff --git a/reports/questions/2026.2.2/reports.yaml b/reports/questions/2026.2.2/reports.yaml new file mode 100644 index 000000000..9c2441cd4 --- /dev/null +++ b/reports/questions/2026.2.2/reports.yaml @@ -0,0 +1,7 @@ +--- +- model_id: gemma-3-27b-it + good_percent: 2.7% + confidence_interval: 1.7% + good: 10 + total: 370 +